diff --git a/ja/layouts/shortcodes/get-metrics-from-git.html b/ja/layouts/shortcodes/get-metrics-from-git.html
index eb950303487a6..26658e7c81ce4 100644
--- a/ja/layouts/shortcodes/get-metrics-from-git.html
+++ b/ja/layouts/shortcodes/get-metrics-from-git.html
@@ -18,6 +18,7 @@
 
   {{ $data := index $.Page.Site.Data.integrations $integration }}
 
+{{ if $data }}
 
     {{ if (index $params 1 ) }} {{/* Custom metrics set in shortcode param index 1 (second position) */}}
 
@@ -71,5 +72,6 @@
 
     {{ end }}
 
+{{ end }}
 
 {{ end }}
\ No newline at end of file
diff --git a/layouts/shortcodes/get-metrics-from-git.html b/layouts/shortcodes/get-metrics-from-git.html
index 4f6886970fbc6..4aa23607fa380 100644
--- a/layouts/shortcodes/get-metrics-from-git.html
+++ b/layouts/shortcodes/get-metrics-from-git.html
@@ -29,6 +29,7 @@
   {{ end }}
   {{ $data := ($.Scratch.Get "data") }}
 
+  {{ if $data }}
 
     {{ if (index $params 1 ) }} {{/* Custom metrics set in shortcode param index 1 (second position) */}}
 
@@ -82,5 +83,6 @@
 
     {{ end }}
 
+  {{ end }}
 
-{{ end }}
\ No newline at end of file
+{{ end }}
diff --git a/local/bin/py/placehold_translations.py b/local/bin/py/placehold_translations.py
index bcd8aba5a4d50..75890a790f70e 100755
--- a/local/bin/py/placehold_translations.py
+++ b/local/bin/py/placehold_translations.py
@@ -67,8 +67,7 @@ def create_placeholder_file(template, new_glob):
                                   content=new_content.strip())
 
     with open(new_dest, 'w') as o_file:
-        o_file.write(content)
-        print("creating placeholder for {0} at {1}".format(template, new_dest))
+            o_file.write(content)
 
     return new_dest
 
diff --git a/local/bin/py/update_pre_build.py b/local/bin/py/update_pre_build.py
index e0f7c176f54d0..9260fcdf37719 100755
--- a/local/bin/py/update_pre_build.py
+++ b/local/bin/py/update_pre_build.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+
 import csv
 import fnmatch
 import glob
@@ -8,42 +9,62 @@
 import re
 import tempfile
 import shutil
+import requests
+import yaml
+import pickle
+from tqdm import *
 from collections import OrderedDict
 from functools import partial, wraps
 from itertools import chain, zip_longest
 from multiprocessing.pool import ThreadPool as Pool
 from optparse import OptionParser
 from os import sep, makedirs, getenv, remove
-from os.path import exists, basename, curdir, join, abspath, normpath, dirname
-import requests
-import yaml
-from tqdm import *
-import pickle
+from os.path import (
+    exists,
+    basename,
+    curdir,
+    join,
+    abspath,
+    normpath,
+    dirname,
+)
+
+CONFIGURATION_FILE = "./local/etc/pull_config.yaml"
 
 
 def cache_by_sha(func):
     """ only downloads fresh file, if we don't have one or we do and the sha has changed """
+
     @wraps(func)
     def cached_func(*args, **kwargs):
         cache = {}
         list_item = args[1]
-        dest_dir = kwargs.get('dest_dir')
-        path_to_file = list_item.get('path', '')
-        file_out = '{}{}'.format(dest_dir, path_to_file)
-        p_file_out = '{}{}.pickle'.format(dest_dir, path_to_file)
+        dest_dir = kwargs.get("dest_dir")
+        path_to_file = list_item.get("path", "")
+        file_out = "{}{}".format(dest_dir, path_to_file)
+        p_file_out = "{}{}.pickle".format(
+            dest_dir, path_to_file
+        )
         makedirs(dirname(file_out), exist_ok=True)
         if exists(p_file_out) and exists(file_out):
-            with open(p_file_out, 'rb') as pf:
+            with open(p_file_out, "rb") as pf:
                 cache = pickle.load(pf)
-        cache_sha = cache.get('sha', False)
-        input_sha = list_item.get('sha', False)
-        if cache_sha and input_sha and cache_sha == input_sha:
+        cache_sha = cache.get("sha", False)
+        input_sha = list_item.get("sha", False)
+        if (
+            cache_sha
+            and input_sha
+            and cache_sha == input_sha
+        ):
             # do nothing as we have the up to date file already
             return None
         else:
-            with open(p_file_out, mode='wb+') as pf:
-                pickle.dump(list_item, pf, pickle.HIGHEST_PROTOCOL)
+            with open(p_file_out, mode="wb+") as pf:
+                pickle.dump(
+                    list_item, pf, pickle.HIGHEST_PROTOCOL
+                )
             return func(*args, **kwargs)
+
     return cached_func
 
 
@@ -58,39 +79,68 @@ def __exit__(self, *exc):
         return False
 
     def headers(self):
-        return {'Authorization': 'token {}'.format(self.token)} if self.token else {}
+        return (
+            {"Authorization": "token {}".format(self.token)}
+            if self.token
+            else {}
+        )
 
     def extract(self, data):
         out = []
-        for item in data.get('tree', []):
-            out.append({'path': item.get('path', ''), 'url': item.get('url', ''), 'type': item.get('type', ''),
-                        'sha': item.get('sha', '')})
-            if item.get('tree', None):
-                out.append(self.extract(item.get('tree')))
+        for item in data.get("tree", []):
+            out.append(
+                {
+                    "path": item.get("path", ""),
+                    "url": item.get("url", ""),
+                    "type": item.get("type", ""),
+                    "sha": item.get("sha", ""),
+                }
+            )
+            if item.get("tree", None):
+                out.append(self.extract(item.get("tree")))
         return out
 
     def list(self, org, repo, branch, globs=None):
         globs = [] if globs is None else globs
         listing = []
         # get the latest sha
-        url = 'https://api.github.com/repos/{0}/{1}/git/refs/heads/{2}'.format(org, repo, branch)
+        url = "https://api.github.com/repos/{0}/{1}/git/refs/heads/{2}".format(
+            org, repo, branch
+        )
         headers = self.headers()
-        print('Getting latest sha from {}/{}..'.format(repo, branch))
+        print(
+            "Getting latest sha from {}/{}..".format(
+                repo, branch
+            )
+        )
         sha_response = requests.get(url, headers=headers)
         if sha_response.status_code == requests.codes.ok:
-            sha = sha_response.json().get('object', {}).get('sha', None)
+            sha = (
+                sha_response.json()
+                .get("object", {})
+                .get("sha", None)
+            )
             if sha:
-                print('Getting tree from {}/{} @ {}'.format(repo, branch, sha))
+                print(
+                    "Getting tree from {}/{} @ {}".format(
+                        repo, branch, sha
+                    )
+                )
                 tree_response = requests.get(
-                    'https://api.github.com/repos/{0}/{1}/git/trees/{2}?recursive=1'.format(org, repo, sha),
-                    headers=headers)
+                    "https://api.github.com/repos/{0}/{1}/git/trees/{2}?recursive=1".format(
+                        org, repo, sha
+                    ),
+                    headers=headers,
+                )
                 if tree_response.status_code == 200:
-                    listing = self.extract(tree_response.json())
+                    listing = self.extract(
+                        tree_response.json()
+                    )
 
         if globs:
             filtered_listing = []
             for item in listing:
-                path = item.get('path', '')
+                path = item.get("path", "")
                 for glob_string in globs:
                     if fnmatch.fnmatch(path, glob_string):
                         filtered_listing.append(item)
@@ -99,17 +149,27 @@ def list(self, org, repo, branch, globs=None):
             return listing
 
     @cache_by_sha
-    def raw(self, list_item, request_session, org, repo, branch, dest_dir):
+    def raw(
+        self,
+        list_item,
+        request_session,
+        org,
+        repo,
+        branch,
+        dest_dir,
+    ):
         headers = self.headers()
-        path_to_file = list_item.get('path', '')
-        file_out = '{}{}'.format(dest_dir, path_to_file)
+        path_to_file = list_item.get("path", "")
+        file_out = "{}{}".format(dest_dir, path_to_file)
         raw_response = request_session.get(
-            'https://raw.githubusercontent.com/{0}/{1}/{2}/{3}'.format(org, repo, branch, path_to_file),
-            headers=headers
+            "https://raw.githubusercontent.com/{0}/{1}/{2}/{3}".format(
+                org, repo, branch, path_to_file
+            ),
+            headers=headers,
         )
         if raw_response.status_code == requests.codes.ok:
             makedirs(dirname(file_out), exist_ok=True)
-            with open(file_out, mode='wb+') as f:
+            with open(file_out, mode="wb+") as f:
                 f.write(raw_response.content)
 
 
@@ -117,54 +177,204 @@ class PreBuild:
     def __init__(self, opts):
         super().__init__()
         self.options = opts
-        if self.options.dogweb and not self.options.dogweb.endswith(sep):
+        if (
+            self.options.dogweb
+            and not self.options.dogweb.endswith(sep)
+        ):
             self.options.dogweb = self.options.dogweb + sep
-        if self.options.integrations and not self.options.integrations.endswith(sep):
-            self.options.integrations = self.options.integrations + sep
-        if self.options.extras and not self.options.extras.endswith(sep):
+        if (
+            self.options.integrations
+            and not self.options.integrations.endswith(sep)
+        ):
+            self.options.integrations = (
+                self.options.integrations + sep
+            )
+        if (
+            self.options.extras
+            and not self.options.extras.endswith(sep)
+        ):
             self.options.extras = self.options.extras + sep
-        self.tempdir = '/tmp' if platform.system() == 'Darwin' else tempfile.gettempdir()
-        self.data_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'data' + sep)
-        self.content_dir = '{0}{1}{2}'.format(abspath(normpath(options.source)), sep, 'content' + sep)
-        self.data_integrations_dir = join(self.data_dir, 'integrations') + sep
-        self.data_service_checks_dir = join(self.data_dir, 'service_checks') + sep
-        self.content_integrations_dir = join(self.content_dir, 'integrations') + sep
-        self.extract_dir = '{0}'.format(join(self.tempdir, "extracted") + sep)
-        self.integration_datafile = '{0}{1}{2}'.format(abspath(normpath(self.options.source)), sep, "integrations.json")
-        self.regex_h1 = re.compile(r'^#{1}(?!#)(.*)', re.MULTILINE)
-        self.regex_h1_replace = re.compile(r'^(#{1})(?!#)(.*)', re.MULTILINE)
-        self.regex_metrics = re.compile(r'(#{3} Metrics\n)([\s\S]*this integration.|[\s\S]*this check.)([\s\S]*)(#{3} Events\n)', re.DOTALL)
-        self.regex_service_check = re.compile(r'(#{3} Service Checks\n)([\s\S]*does not include any service checks at this time.)([\s\S]*)(#{2} Troubleshooting\n)', re.DOTALL)
-        self.regex_fm = re.compile(r'(?:-{3})(.*?)(?:-{3})(.*)', re.DOTALL)
-        self.regex_source = re.compile(r'(\S*FROM_DISPLAY_NAME\s*=\s*\{)(.*?)\}', re.DOTALL)
+        self.list_of_contents = []
+        self.tempdir = (
+            "/tmp"
+            if platform.system() == "Darwin"
+            else tempfile.gettempdir()
+        )
+        self.data_dir = "{0}{1}{2}".format(
+            abspath(normpath(options.source)),
+            sep,
+            "data" + sep,
+        )
+        self.content_dir = "{0}{1}{2}".format(
+            abspath(normpath(options.source)),
+            sep,
+            "content" + sep,
+        )
+        self.data_integrations_dir = (
+            join(self.data_dir, "integrations") + sep
+        )
+        self.data_service_checks_dir = (
+            join(self.data_dir, "service_checks") + sep
+        )
+        self.content_integrations_dir = (
+            join(self.content_dir, "integrations") + sep
+        )
+        self.extract_dir = "{0}".format(
+            join(self.tempdir, "extracted") + sep
+        )
+        self.integration_datafile = "{0}{1}{2}".format(
+            abspath(normpath(self.options.source)),
+            sep,
+            "integrations.json",
+        )
+        self.regex_h1 = re.compile(
+            r"^#{1}(?!#)(.*)", re.MULTILINE
+        )
+        self.regex_h1_replace = re.compile(
+            r"^(#{1})(?!#)(.*)", re.MULTILINE
+        )
+        self.regex_metrics = re.compile(
+            r"(#{3} Metrics\n)([\s\S]*this integration.|[\s\S]*this check.)([\s\S]*)(#{3} Events\n)",
+            re.DOTALL,
+        )
+        self.regex_service_check = re.compile(
+            r"(#{3} Service Checks\n)([\s\S]*does not include any service checks at this time.)([\s\S]*)(#{2} Troubleshooting\n)",
+            re.DOTALL,
+        )
+        self.regex_fm = re.compile(
+            r"(?:-{3})(.*?)(?:-{3})(.*)", re.DOTALL
+        )
+        self.regex_source = re.compile(
+            r"(\S*FROM_DISPLAY_NAME\s*=\s*\{)(.*?)\}",
+            re.DOTALL,
+        )
         self.datafile_json = []
         self.pool_size = 5
-        self.integration_mutations = OrderedDict({
-            'hdfs': {'action': 'create', 'target': 'hdfs', 'remove_header': False, 'fm': {'is_public': True, 'kind': 'integration', 'integration_title': 'Hdfs', 'short_description': 'Track cluster disk usage, volume failures, dead DataNodes, and more.'}},
-            'mesos': {'action': 'create', 'target': 'mesos', 'remove_header': False, 'fm': {'aliases': ['/integrations/mesos_master/','/integrations/mesos_slave/'], 'is_public': True, 'kind': 'integration', 'integration_title': 'Mesos', 'short_description': 'Track cluster resource usage, master and slave counts, tasks statuses, and more.'}},
-            'activemq_xml': {'action': 'merge', 'target': 'activemq', 'remove_header': False},
-            'cassandra_nodetool': {'action': 'merge', 'target': 'cassandra', 'remove_header': False},
-            'datadog_checks_base': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'datadog_checks_tests_helper': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'dev': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'docs': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'gitlab_runner': {'action': 'merge', 'target': 'gitlab', 'remove_header': False},
-            'hdfs_datanode': {'action': 'merge', 'target': 'hdfs', 'remove_header': False},
-            'hdfs_namenode': {'action': 'merge', 'target': 'hdfs', 'remove_header': False},
-            'mesos_master': {'action': 'merge', 'target': 'mesos', 'remove_header': True},
-            'mesos_slave': {'action': 'merge', 'target': 'mesos', 'remove_header': False},
-            'kafka_consumer': {'action': 'merge', 'target': 'kafka', 'remove_header': False},
-            'kube_dns': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'kube_proxy': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'kubernetes_state': {'action': 'discard', 'target': 'none', 'remove_header': False},
-            'system_core': {'action': 'discard', 'target': 'system', 'remove_header': False},
-            'system_swap': {'action': 'discard', 'target': 'system', 'remove_header': False},
-            'hbase_regionserver': {'action': 'merge', 'target': 'hbase_master', 'remove_header': False},
-        })
-        self.initial_integration_files = glob.glob('{}*.md'.format(self.content_integrations_dir))
+        self.integration_mutations = OrderedDict(
+            {
+                "hdfs": {
+                    "action": "create",
+                    "target": "hdfs",
+                    "remove_header": False,
+                    "fm": {
+                        "is_public": True,
+                        "kind": "integration",
+                        "integration_title": "Hdfs",
+                        "short_description": "Track cluster disk usage, volume failures, dead DataNodes, and more.",
+                    },
+                },
+                "mesos": {
+                    "action": "create",
+                    "target": "mesos",
+                    "remove_header": False,
+                    "fm": {
+                        "aliases": [
+                            "/integrations/mesos_master/",
+                            "/integrations/mesos_slave/",
+                        ],
+                        "is_public": True,
+                        "kind": "integration",
+                        "integration_title": "Mesos",
+                        "short_description": "Track cluster resource usage, master and slave counts, tasks statuses, and more.",
+                    },
+                },
+                "activemq_xml": {
+                    "action": "merge",
+                    "target": "activemq",
+                    "remove_header": False,
+                },
+                "cassandra_nodetool": {
+                    "action": "merge",
+                    "target": "cassandra",
+                    "remove_header": False,
+                },
+                "datadog_checks_base": {
+                    "action": "discard",
+                    "target": "none",
+                    "remove_header": False,
+                },
+                "datadog_checks_tests_helper": {
+                    "action": "discard",
+                    "target": "none",
+                    "remove_header": False,
+                },
+                "docs": {
+                    "action": "discard",
+                    "target": "none",
+                    "remove_header": False,
+                },
+                "gitlab_runner": {
+                    "action": "merge",
+                    "target": "gitlab",
+                    "remove_header": False,
+                },
+                "hdfs_datanode": {
+                    "action": "merge",
+                    "target": "hdfs",
+                    "remove_header": False,
+                },
+                "hdfs_namenode": {
+                    "action": "merge",
+                    "target": "hdfs",
+                    "remove_header": False,
+                },
+                "mesos_master": {
+                    "action": "merge",
+                    "target": "mesos",
+                    "remove_header": True,
+                },
+                "mesos_slave": {
+                    "action": "merge",
+                    "target": "mesos",
+                    "remove_header": False,
+                },
+                "kafka_consumer": {
+                    "action": "merge",
+                    "target": "kafka",
+                    "remove_header": False,
+                },
+                "kube_dns": {
+                    "action": "discard",
+                    "target": "none",
+                    "remove_header": False,
+                },
+                "kube_proxy": {
+                    "action": "discard",
+                    "target": "none",
+                    "remove_header": False,
+                },
+                "kubernetes_state": {
+                    "action": "discard",
+                    "target": "none",
+                    "remove_header": False,
+                },
+                "system_core": {
+                    "action": "discard",
+                    "target": "system",
+                    "remove_header": False,
+                },
+                "system_swap": {
+                    "action": "discard",
+                    "target": "system",
+                    "remove_header": False,
+                },
+                "hbase_regionserver": {
+                    "action": "merge",
+                    "target": "hbase_master",
+                    "remove_header": False,
+                },
+            }
+        )
+        self.initial_integration_files = glob.glob(
+            "{}*.md".format(self.content_integrations_dir)
+        )
         makedirs(self.data_integrations_dir, exist_ok=True)
-        makedirs(self.data_service_checks_dir, exist_ok=True)
-        makedirs(self.content_integrations_dir, exist_ok=True)
+        makedirs(
+            self.data_service_checks_dir, exist_ok=True
+        )
+        makedirs(
+            self.content_integrations_dir, exist_ok=True
+        )
 
     @staticmethod
     def csv_to_yaml(key_name, csv_filename, yml_filename):
@@ -177,11 +387,21 @@ def csv_to_yaml(key_name, csv_filename, yml_filename):
         """
         yaml_data = {key_name: []}
         with open(csv_filename) as csv_file:
-            reader = csv.DictReader(csv_file, delimiter=',')
-            yaml_data[key_name] = [dict(line) for line in reader]
+            reader = csv.DictReader(csv_file, delimiter=",")
+            yaml_data[key_name] = [
+                dict(line) for line in reader
+            ]
         if yaml_data[key_name]:
-            with open(file=yml_filename, mode='w', encoding='utf-8') as f:
-                f.write(yaml.dump(yaml_data, default_flow_style=False))
+            with open(
+                file=yml_filename,
+                mode="w",
+                encoding="utf-8",
+            ) as f:
+                f.write(
+                    yaml.dump(
+                        yaml_data, default_flow_style=False
+                    )
+                )
 
     def download_from_repo(self, org, repo, branch, globs):
         """
@@ -194,172 +414,440 @@ def download_from_repo(self, org, repo, branch, globs):
         """
         with GitHub(self.options.token) as gh:
             listing = gh.list(org, repo, branch, globs)
-            dest = '{0}{1}{2}'.format(self.extract_dir, repo, sep)
+            dest = "{0}{1}{2}".format(
+                self.extract_dir, repo, sep
+            )
             with Pool(processes=self.pool_size) as pool:
                 with requests.Session() as s:
-                  r = [x for x in tqdm(
-                      pool.imap_unordered(partial(gh.raw, request_session=s, org=org, repo=repo, branch=branch, dest_dir=dest), listing))]
+                    r = [
+                        x
+                        for x in tqdm(
+                            pool.imap_unordered(
+                                partial(
+                                    gh.raw,
+                                    request_session=s,
+                                    org=org,
+                                    repo=repo,
+                                    branch=branch,
+                                    dest_dir=dest,
+                                ),
+                                listing,
+                            )
+                        )
+                    ]
 
     def process(self):
         """
-        1. If we did not specify local dogweb directory and there is a token download dogweb repo files we need
-        2. If we did not specify local integrations-core directory download with or without token as its public repo
-        3. Process all files we have dogweb first integrations-core second with the latter taking precedence
+        This represents the overall workflow of the build of the documentation
         """
-        print('Processing')
-
-        dogweb_globs = ['integration/**/*_metadata.csv', 'integration/**/manifest.json',
-                        'integration/**/service_checks.json', 'integration/**/README.md',
-                        'dd/utils/context/source.py']
-        integrations_globs = ['*[!}]/metadata.csv', '*[!}]/manifest.json', '*[!}]/service_checks.json', '*[!}]/README.md', 'docs/**']
-        extras_globs = ['**/metadata.csv', '**/manifest.json', '**/service_checks.json', '**/README.md']
-
-        # sync from dogweb, download if we don't have it (token required)
-        if not self.options.dogweb:
-            if self.options.token:
-                self.download_from_repo('DataDog', 'dogweb', 'prod', dogweb_globs)
-                self.options.dogweb = '{0}{1}{2}'.format(self.extract_dir, 'dogweb', sep)
-
-        # sync from integrations-core, download if we don't have it (public repo so no token needed)
-        if not options.integrations:
-            self.download_from_repo('DataDog', 'integrations-core', 'master', integrations_globs)
-            self.options.integrations = '{0}{1}{2}'.format(self.extract_dir, 'integrations-core', sep)
-
-        # sync from integrations-extras, download if we don't have it (public repo so no token needed)
-        if not options.extras:
-            self.download_from_repo('DataDog', 'integrations-extras', 'master', extras_globs)
-            self.options.extras = '{0}{1}{2}'.format(self.extract_dir, 'integrations-extras', sep)
-
-        globs = []
-
-        for d_glob, i_glob, e_glob  in zip_longest(dogweb_globs, integrations_globs, extras_globs):
-            if d_glob:
-                globs.append('{}{}'.format(self.options.dogweb, d_glob))
-            if i_glob:
-                globs.append('{}{}'.format(self.options.integrations, i_glob))
-            if e_glob:
-                globs.append('{}{}'.format(self.options.extras, e_glob))
-
-        for file_name in tqdm(chain.from_iterable(glob.iglob(pattern, recursive=True) for pattern in globs)):
-            self.process_source_attribute(file_name)
-            self.process_integration_metric(file_name)
-            self.process_integration_manifest(file_name)
-            self.process_service_checks(file_name)
-            self.process_integration_readme(file_name)
-            self.dev_doc_integrations_core(file_name)
+        print("Processing")
+
+        self.extract_config()
+
+        self.local_or_upstream()
+
+        self.process_filenames()
 
         self.merge_integrations()
 
+    def extract_config(self):
+        """
+        This pulls the content from the configuration file at CONFIGURATION_FILE location
+        then parses it to populate the list_of_content variable that contains all contents
+        that needs to be pulled and processed.
+        """
+        print(
+            "Loading {} configuration file".format(
+                CONFIGURATION_FILE
+            )
+        )
+        configuration = yaml.load(open(CONFIGURATION_FILE))
+        for org in configuration:
+            for repo in org["repos"]:
+                for content in repo["contents"]:
+                    content_temp = {}
+                    content_temp["org_name"] = org[
+                        "org_name"
+                    ]
+                    content_temp["repo_name"] = repo[
+                        "repo_name"
+                    ]
+                    content_temp["branch"] = content[
+                        "branch"
+                    ]
+                    content_temp["action"] = content[
+                        "action"
+                    ]
+                    content_temp["globs"] = content["globs"]
+
+                    if content["action"] == "pull-and-push":
+                        content_temp["options"] = content[
+                            "options"
+                        ]
+
+                    self.list_of_contents.append(
+                        content_temp
+                    )
+                    print(
+                        "Adding content {} ".format(
+                            content_temp
+                        )
+                    )
+
+    def local_or_upstream(self):
+        """
+        This goes through the list_of_contents and check for each repo specified
+        If a local version exists otherwise we download it from the upstream repo on Github
+        """
+        for content in self.list_of_contents:
+            if content["repo_name"] == "dogweb":
+                if not self.options.dogweb:
+                    if self.options.token:
+                        print(
+                            "No local version of {} found, downloading content from upstream version".format(
+                                content["repo_name"]
+                            )
+                        )
+                        self.download_from_repo(
+                            content["org_name"],
+                            content["repo_name"],
+                            content["branch"],
+                            content["globs"],
+                        )
+
+            elif (
+                content["repo_name"] == "integrations-core"
+            ):
+                if not self.options.integrations:
+                    print(
+                        "No local version of {} found, downloading downloading content from upstream version".format(
+                            content["repo_name"]
+                        )
+                    )
+                    self.download_from_repo(
+                        content["org_name"],
+                        content["repo_name"],
+                        content["branch"],
+                        content["globs"],
+                    )
+
+            elif (
+                content["repo_name"]
+                == "integrations-extras"
+            ):
+                if not self.options.extras:
+                    print(
+                        "No local version of {} found, downloading downloading content from upstream version".format(
+                            content["repo_name"]
+                        )
+                    )
+                    self.download_from_repo(
+                        content["org_name"],
+                        content["repo_name"],
+                        content["branch"],
+                        content["globs"],
+                    )
+
+            else:
+                print(
+                    "No local version of {} found, downloading downloading content from upstream version".format(
+                        content["repo_name"]
+                    )
+                )
+                self.download_from_repo(
+                    content["org_name"],
+                    content["repo_name"],
+                    content["branch"],
+                    content["globs"],
+                )
+
+            print(
+                "Updating globs for new local version of repo {}".format(
+                    content["repo_name"]
+                )
+            )
+            content["globs"] = self.update_globs(
+                "{0}{1}{2}".format(
+                    self.extract_dir,
+                    content["repo_name"],
+                    sep,
+                ),
+                content["globs"],
+            )
+
+    def update_globs(self, new_path, globs):
+        """
+        Depending if the repo is local or we downloaded it we need to update the globs to match
+        the final version of the repo to use
+        :param new_path: new_path to update the globs with
+        :param globs: list of globs to update
+        """
+        new_globs = []
+        for item in globs:
+            new_globs.append("{}{}".format(new_path, item))
+
+        return new_globs
+
+    def process_filenames(self):
+        """
+        Goes through the list_of_contents and for each content
+        triggers the right action to apply.
+        """
+        for content in self.list_of_contents:
+            print("Processing content: {}".format(content))
+            if content["action"] == "integrations":
+                self.process_integrations(content)
+
+            elif content["action"] == "source":
+
+                self.process_source_attribute(content)
+
+            elif content["action"] == "pull-and-push":
+
+                self.pull_and_push(content)
+            else:
+                print(
+                    "[ERROR] Unsuccessful Processing of {}".format(
+                        content
+                    )
+                )
+
+    def process_integrations(self, content):
+        """
+        Goes through all files needed for integrations build
+        and triggers the right function for the right type of file.
+        :param content: integrations content to process
+        """
+        for file_name in tqdm(
+            chain.from_iterable(
+                glob.iglob(pattern, recursive=True)
+                for pattern in content["globs"]
+            )
+        ):
+            if file_name.endswith(".csv"):
+                self.process_integration_metric(file_name)
+
+            elif file_name.endswith("manifest.json"):
+                self.process_integration_manifest(file_name)
+
+            elif file_name.endswith("service_checks.json"):
+                self.process_service_checks(file_name)
+
+            elif file_name.endswith(".md"):
+                self.process_integration_readme(file_name)
+
+    def pull_and_push(self, content):
+        """
+        Take the content from a folder following github logic
+        and transform it to be displayed in the doc in dest_dir folder
+        :param content: content to process
+        """
+
+        for file_name in tqdm(
+            chain.from_iterable(
+                glob.iglob(pattern, recursive=True)
+                for pattern in content["globs"]
+            )
+        ):
+            with open(file_name, mode="r+") as f:
+                file_content = f.read()
+
+                # Replacing the master README.md by _index.md to follow Hugo logic
+                if file_name.endswith("README.md"):
+                    file_name = "_index.md"
+
+                # Replacing links that point to the Github folder by link that point to the doc.
+                new_link = (
+                    content["options"]["dest_dir"] + "\\2"
+                )
+                regex_github_link = re.compile(
+                    r"(https:\/\/github\.com\/{}\/{}\/blob\/{}\/{})(\S+)\.md".format(
+                        content["org_name"],
+                        content["repo_name"],
+                        content["branch"],
+                        content["options"][
+                            "path_to_remove"
+                        ],
+                    )
+                )
+                file_content = re.sub(
+                    regex_github_link,
+                    new_link,
+                    file_content,
+                    count=0,
+                )
+
+            # Writing the new content to the documentation file
+            dirp = "{}{}".format(
+                self.content_dir,
+                content["options"]["dest_dir"][1:],
+            )
+            makedirs(dirp, exist_ok=True)
+            with open(
+                "{}{}".format(dirp, basename(file_name)),
+                mode="w+",
+                encoding="utf-8",
+            ) as f:
+                f.write(file_content)
+
     def merge_integrations(self):
         """ Merges integrations that come under one """
-        for name, action_obj in self.integration_mutations.items():
+        for (
+            name,
+            action_obj,
+        ) in self.integration_mutations.items():
             if name not in self.initial_integration_files:
-                action = action_obj.get('action')
-                target = action_obj.get('target')
-                input_file = '{}{}.md'.format(self.content_integrations_dir, name)
-                output_file = '{}{}.md'.format(self.content_integrations_dir, target)
-                if action == 'merge':
-                    with open(input_file, 'r') as content_file, open(output_file, 'a') as target_file:
+                action = action_obj.get("action")
+                target = action_obj.get("target")
+                input_file = "{}{}.md".format(
+                    self.content_integrations_dir, name
+                )
+                output_file = "{}{}.md".format(
+                    self.content_integrations_dir, target
+                )
+                if action == "merge":
+                    with open(
+                        input_file, "r"
+                    ) as content_file, open(
+                        output_file, "a"
+                    ) as target_file:
                         content = content_file.read()
-                        content = re.sub(self.regex_fm, r'\2', content, count=0)
-                        if action_obj.get('remove_header', False):
-                            content = re.sub(self.regex_h1, '', content, count=0)
+                        content = re.sub(
+                            self.regex_fm,
+                            r"\2",
+                            content,
+                            count=0,
+                        )
+                        if action_obj.get(
+                            "remove_header", False
+                        ):
+                            content = re.sub(
+                                self.regex_h1,
+                                "",
+                                content,
+                                count=0,
+                            )
                         else:
-                            content = re.sub(self.regex_h1_replace, r'##\2', content, count=0)
+                            content = re.sub(
+                                self.regex_h1_replace,
+                                r"##\2",
+                                content,
+                                count=0,
+                            )
                         target_file.write(content)
                     try:
                         remove(input_file)
                     except OSError:
-                        print('the file {} was not found and could not be removed during merge action'.format(input_file))
-                elif action == 'truncate':
+                        print(
+                            "the file {} was not found and could not be removed during merge action".format(
+                                input_file
+                            )
+                        )
+                elif action == "truncate":
                     if exists(output_file):
-                        with open(output_file, 'r+') as target_file:
+                        with open(
+                            output_file, "r+"
+                        ) as target_file:
                             content = target_file.read()
-                            content = re.sub(self.regex_fm, r'---\n\1\n---\n', content, count=0)
+                            content = re.sub(
+                                self.regex_fm,
+                                r"---\n\1\n---\n",
+                                content,
+                                count=0,
+                            )
                             target_file.truncate(0)
                             target_file.seek(0)
                             target_file.write(content)
                     else:
-                        open(output_file, 'w').close()
-                elif action == 'discard':
+                        open(output_file, "w").close()
+                elif action == "discard":
                     try:
                         remove(input_file)
                     except OSError:
-                        print('the file {} was not found and could not be removed during discard action'.format(input_file))
-                elif action == 'create':
-                    with open(output_file, 'w+') as f:
-                        fm = yaml.dump(action_obj.get('fm'), default_flow_style=False).rstrip()
-                        data = '---\n{0}\n---\n'.format(fm)
+                        print(
+                            "the file {} was not found and could not be removed during discard action".format(
+                                input_file
+                            )
+                        )
+                elif action == "create":
+                    with open(output_file, "w+") as f:
+                        fm = yaml.dump(
+                            action_obj.get("fm"),
+                            default_flow_style=False,
+                        ).rstrip()
+                        data = "---\n{0}\n---\n".format(fm)
                         f.write(data)
 
-    def process_source_attribute(self, file_name):
+    def process_source_attribute(self, content):
         """
         Take a single source.py file extracts the FROM_DISPLAY_NAME dict values
         and inserts them into the file something.md
         :param file_name: path to a source.py file
         """
-        if file_name.endswith('dd/utils/context/source.py'):
-            out = '|Integration name | API source attribute|\n'
-            out += '|:---|:---|\n'
-            with open(file_name, 'r') as f:
-                result = f.read()
-                m = re.search(self.regex_source, result)
-                result = m.group(2) if m else result
-                result = re.sub(r'[^0-9A-Za-z:, ]', '', result)
-                for line in result.split(','):
-                    pair = line.split(':')
-                    if len(pair) > 1:
-                        out += '|{0}|{1}|\n'.format(pair[0].strip().title(), pair[1].strip())
-            with open('{}{}'.format(self.options.source, '/content/integrations/faq/list-of-api-source-attribute-value.md'), mode='r+', encoding='utf-8') as f:
-                boundary = re.compile(r'^-{3,}$', re.MULTILINE)
-                _, fm, content = boundary.split(f.read(), 2)
-                template = "---\n{front_matter}\n---\n\n{content}\n"
-                new_content = template.format(front_matter=fm.strip(), content=out)
-                f.truncate(0)
-                f.seek(0)
-                f.write(new_content)
+        for file_name in tqdm(
+            chain.from_iterable(
+                glob.iglob(pattern, recursive=True)
+                for pattern in content["globs"]
+            )
+        ):
+            if file_name.endswith(
+                "dd/utils/context/source.py"
+            ):
+                out = "|Integration name | API source attribute|\n"
+                out += "|:---|:---|\n"
+                with open(file_name, "r") as f:
+                    result = f.read()
+                    m = re.search(self.regex_source, result)
+                    result = m.group(2) if m else result
+                    result = re.sub(
+                        r"[^0-9A-Za-z:, ]", "", result
+                    )
+                    for line in result.split(","):
+                        pair = line.split(":")
+                        if len(pair) > 1:
+                            out += "|{0}|{1}|\n".format(
+                                pair[0].strip().title(),
+                                pair[1].strip(),
+                            )
+                with open(
+                    "{}{}".format(
+                        self.options.source,
+                        "/content/integrations/faq/list-of-api-source-attribute-value.md",
+                    ),
+                    mode="r+",
+                    encoding="utf-8",
+                ) as f:
+                    boundary = re.compile(
+                        r"^-{3,}$", re.MULTILINE
+                    )
+                    _, fm, content = boundary.split(
+                        f.read(), 2
+                    )
+                    template = "---\n{front_matter}\n---\n\n{content}\n"
+                    new_content = template.format(
+                        front_matter=fm.strip(), content=out
+                    )
+                    f.truncate(0)
+                    f.seek(0)
+                    f.write(new_content)
 
     def process_integration_metric(self, file_name):
         """
         Take a single metadata csv file and convert it to yaml
         :param file_name: path to a metadata csv file
         """
-        if file_name.endswith('.csv'):
-            if file_name.endswith('/metadata.csv'):
-                key_name = basename(dirname(normpath(file_name)))
-            else:
-                key_name = basename(file_name.replace('_metadata.csv', ''))
-            new_file_name = '{}{}.yaml'.format(self.data_integrations_dir, key_name)
-            self.csv_to_yaml(key_name, file_name, new_file_name)
-
-    def dev_doc_integrations_core(self, file_name):
-        """
-        Take the content from https://github.com/DataDog/integrations-core/tree/master/docs/dev
-        and transform it to be displayed on the doc in the /developers/integrations section
-        :param file_name: path to a file
-        """
-        relative_path_on_github = '/integrations-core/docs/dev/'
-        doc_directory = '/developers/integrations/'
-
-        if (relative_path_on_github in file_name and file_name.endswith('.md')):
-
-            with open(file_name, mode='r+') as f:
-                content = f.read()
-
-                # Replacing the master README.md by _index.md to follow Hugo logic
-                if file_name.endswith('README.md'):
-                    file_name = '_index.md'
-
-                #Replacing links that point to the Github folder by link that point to the doc.
-                new_link = doc_directory +'\\2'
-                regex_github_link = re.compile(r'(https:\/\/github\.com\/DataDog\/integrations-core\/blob\/master\/docs\/dev\/)(\S+)\.md')
-                content = re.sub(regex_github_link, new_link, content, count=0)
-
-            # Writing the new content to the documentation file
-            dirp = '{}{}'.format(self.content_dir, doc_directory[1:])
-            makedirs(dirp, exist_ok=True)
-            with open('{}{}'.format(dirp, basename(file_name)), mode='w+', encoding='utf-8') as f:
-                f.write(content)
+        if file_name.endswith("/metadata.csv"):
+            key_name = basename(
+                dirname(normpath(file_name))
+            )
+        else:
+            key_name = basename(
+                file_name.replace("_metadata.csv", "")
+            )
+        new_file_name = "{}{}.yaml".format(
+            self.data_integrations_dir, key_name
+        )
+        self.csv_to_yaml(key_name, file_name, new_file_name)
 
     def process_integration_manifest(self, file_name):
         """
@@ -367,19 +855,32 @@ def process_integration_manifest(self, file_name):
         set is_public to false to hide integrations we merge later
         :param file_name: path to a manifest json file
         """
-        if file_name.endswith('manifest.json'):
-            names = [d.get('name', '').lower() for d in self.datafile_json if 'name' in d]
-            with open(file_name) as f:
-                data = json.load(f)
-                data_name = data.get('name', '').lower()
-                if data_name in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']:
-                    data['is_public'] = False
-                if data_name in names:
-                    item = [d for d in self.datafile_json if d.get('name', '').lower() == data_name]
-                    if len(item) > 0:
-                        item[0].update(data)
-                else:
-                    self.datafile_json.append(data)
+
+        names = [
+            d.get("name", "").lower()
+            for d in self.datafile_json
+            if "name" in d
+        ]
+        with open(file_name) as f:
+            data = json.load(f)
+            data_name = data.get("name", "").lower()
+            if data_name in [
+                k
+                for k, v in self.integration_mutations.items()
+                if v.get("action") == "merge"
+            ]:
+                data["is_public"] = False
+            if data_name in names:
+                item = [
+                    d
+                    for d in self.datafile_json
+                    if d.get("name", "").lower()
+                    == data_name
+                ]
+                if len(item) > 0:
+                    item[0].update(data)
+            else:
+                self.datafile_json.append(data)
 
     def process_service_checks(self, file_name):
         """
@@ -387,9 +888,13 @@ def process_service_checks(self, file_name):
         as the integration name it came from e.g /data/service_checks/docker.json
         :param file_name: path to a service_checks json file
         """
-        if file_name.endswith('service_checks.json'):
-            new_file_name = '{}.json'.format(basename(dirname(normpath(file_name))))
-            shutil.copy(file_name, self.data_service_checks_dir + new_file_name)
+        new_file_name = "{}.json".format(
+            basename(dirname(normpath(file_name)))
+        )
+        shutil.copy(
+            file_name,
+            self.data_service_checks_dir + new_file_name,
+        )
 
     def process_integration_readme(self, file_name):
         """
@@ -401,38 +906,91 @@ def process_integration_readme(self, file_name):
         5. write out file to content/integrations with filename changed to integrationname.md
         :param file_name: path to a readme md file
         """
-        if file_name.endswith('.md'):
-            dependencies = []
-            if file_name.startswith(self.options.integrations):
-                dependencies.append(file_name.replace(self.options.integrations, "https://github.com/DataDog/integrations-core/blob/master/"))
-            elif file_name.startswith(self.options.extras):
-                dependencies.append(file_name.replace(self.options.extras, "https://github.com/DataDog/integrations-extras/blob/master/"))
-            metrics = glob.glob('{path}{sep}*metadata.csv'.format(path=dirname(file_name), sep=sep))
-            metrics = metrics[0] if len(metrics) > 0 else None
-            metrics_exist = metrics and exists(metrics) and linecache.getline(metrics, 2)
-            service_check = glob.glob('{file}.json'.format(file=self.data_service_checks_dir + basename(dirname(file_name))))
-            service_check = service_check[0] if len(service_check) > 0 else None
-            service_check_exist = service_check and exists(service_check)
-            manifest = '{0}{1}{2}'.format(dirname(file_name), sep, 'manifest.json')
-            manifest_json = json.load(open(manifest)) if exists(manifest) else {}
-            new_file_name = '{}.md'.format(basename(dirname(file_name)))
-            exist_already = exists(self.content_integrations_dir + new_file_name)
-            with open(file_name, 'r') as f:
-                result = f.read()
-                title = manifest_json.get('name', '').lower()
-                if title not in [k for k, v in self.integration_mutations.items() if v.get('action') == 'merge']:
-                    result = re.sub(self.regex_h1, '', result, 1)
-                if metrics_exist:
-                    result = re.sub(self.regex_metrics, r'\1{{< get-metrics-from-git "%s" >}}\n\3\4'%format(title), result, 0)
-                if service_check_exist:
-                    result = re.sub(self.regex_service_check, r'\1{{< get-service-checks-from-git "%s" >}}\n\3\4' % format(title), result, 0)
-                result = "{0}\n\n{1}".format(result, '{{< get-dependencies >}}')
-                result = self.add_integration_frontmatter(new_file_name, result, dependencies)
-                if not exist_already:
-                    with open(self.content_integrations_dir + new_file_name, 'w') as out:
-                        out.write(result)
-
-    def add_integration_frontmatter(self, file_name, content, dependencies=[]):
+
+        metrics = glob.glob(
+            "{path}{sep}*metadata.csv".format(
+                path=dirname(file_name), sep=sep
+            )
+        )
+        metrics = metrics[0] if len(metrics) > 0 else None
+        metrics_exist = (
+            metrics
+            and exists(metrics)
+            and linecache.getline(metrics, 2)
+        )
+        service_check = glob.glob(
+            "{file}.json".format(
+                file=self.data_service_checks_dir
+                + basename(dirname(file_name))
+            )
+        )
+        service_check = (
+            service_check[0]
+            if len(service_check) > 0
+            else None
+        )
+        service_check_exist = service_check and exists(
+            service_check
+        )
+        manifest = "{0}{1}{2}".format(
+            dirname(file_name), sep, "manifest.json"
+        )
+        manifest_json = (
+            json.load(open(manifest))
+            if exists(manifest)
+            else {}
+        )
+        dependencies = self.add_dependencies(file_name)
+        new_file_name = "{}.md".format(
+            basename(dirname(file_name))
+        )
+        exist_already = exists(
+            self.content_integrations_dir + new_file_name
+        )
+        with open(file_name, "r") as f:
+            result = f.read()
+            title = manifest_json.get("name", "").lower()
+            if title not in [
+                k
+                for k, v in self.integration_mutations.items()
+                if v.get("action") == "merge"
+            ]:
+                result = re.sub(
+                    self.regex_h1, "", result, 1
+                )
+            if metrics_exist:
+                result = re.sub(
+                    self.regex_metrics,
+                    r'\1{{< get-metrics-from-git "%s" >}}\n\3\4'
+                    % format(title),
+                    result,
+                    0,
+                )
+            if service_check_exist:
+                result = re.sub(
+                    self.regex_service_check,
+                    r'\1{{< get-service-checks-from-git "%s" >}}\n\3\4'
+                    % format(title),
+                    result,
+                    0,
+                )
+            result = "{0}\n\n{1}".format(
+                result, "{{< get-dependencies >}}"
+            )
+            result = self.add_integration_frontmatter(
+                new_file_name, result, dependencies
+            )
+            if not exist_already:
+                with open(
+                    self.content_integrations_dir
+                    + new_file_name,
+                    "w",
+                ) as out:
+                    out.write(result)
+
+    def add_integration_frontmatter(
+        self, file_name, content, dependencies=[]
+    ):
         """
         Takes an integration README.md and injects front matter yaml based on manifest.json data of the same integration
         :param file_name: new integration markdown filename e.g airbrake.md
@@ -442,32 +1000,115 @@ def add_integration_frontmatter(self, file_name, content, dependencies=[]):
         fm = {}
         template = "---\n{front_matter}\n---\n\n{content}\n"
         if file_name not in self.initial_integration_files:
-            item = [d for d in self.datafile_json if d.get('name', '').lower() == basename(file_name).replace('.md', '')]
+            item = [
+                d
+                for d in self.datafile_json
+                if d.get("name", "").lower()
+                == basename(file_name).replace(".md", "")
+            ]
             if item and len(item) > 0:
-                item[0]['kind'] = 'integration'
-                item[0]['integration_title'] = item[0].get('public_title', '').replace('Datadog-', '').replace(
-                    'Integration', '').strip()
-                item[0]['git_integration_title'] = item[0].get('name', '').lower()
-                if item[0].get('type', None):
-                    item[0]['ddtype'] = item[0].get('type')
-                    del item[0]['type']
-                item[0]['dependencies'] = dependencies
-                fm = yaml.dump(item[0], default_flow_style=False).rstrip()
+                item[0]["kind"] = "integration"
+                item[0]["integration_title"] = (
+                    item[0]
+                    .get("public_title", "")
+                    .replace("Datadog-", "")
+                    .replace("Integration", "")
+                    .strip()
+                )
+                item[0]["git_integration_title"] = (
+                    item[0].get("name", "").lower()
+                )
+                if item[0].get("type", None):
+                    item[0]["ddtype"] = item[0].get("type")
+                    del item[0]["type"]
+                item[0]["dependencies"] = dependencies
+                fm = yaml.dump(
+                    item[0], default_flow_style=False
+                ).rstrip()
             else:
-                fm = {'kind': 'integration'}
-        return template.format(front_matter=fm, content=content)
+                fm = {"kind": "integration"}
+        return template.format(
+            front_matter=fm, content=content
+        )
+
+    def add_dependencies(self, file_name):
+        dependencies = []
+        if file_name.startswith(
+            "{0}{1}{2}".format(
+                self.extract_dir, "integrations-core", sep
+            )
+        ):
+            dependencies.append(
+                file_name.replace(
+                    "{0}{1}{2}".format(
+                        self.extract_dir,
+                        "integrations-core",
+                        sep,
+                    ),
+                    "https://github.com/DataDog/integrations-core/blob/master/",
+                )
+            )
+
+        elif file_name.startswith(
+            "{0}{1}{2}".format(
+                self.extract_dir, "integrations-extras", sep
+            )
+        ):
+            dependencies.append(
+                file_name.replace(
+                    "{0}{1}{2}".format(
+                        self.extract_dir,
+                        "integrations-extras",
+                        sep,
+                    ),
+                    "https://github.com/DataDog/integrations-extras/blob/master/",
+                )
+            )
+
+        return dependencies
 
 
-if __name__ == '__main__':
-    parser = OptionParser(usage="usage: %prog [options] link_type")
-    parser.add_option("-t", "--token", help="github access token", default=None)
-    parser.add_option("-w", "--dogweb", help="path to dogweb local folder", default=None)
-    parser.add_option("-i", "--integrations", help="path to integrations-core local folder", default=None)
-    parser.add_option("-e", "--extras", help="path to integrations-extras local folder", default=None)
-    parser.add_option("-s", "--source", help="location of src files", default=curdir)
+if __name__ == "__main__":
+    parser = OptionParser(
+        usage="usage: %prog [options] link_type"
+    )
+    parser.add_option(
+        "-t",
+        "--token",
+        help="github access token",
+        default=None,
+    )
+    parser.add_option(
+        "-w",
+        "--dogweb",
+        help="path to dogweb local folder",
+        default=None,
+    )
+    parser.add_option(
+        "-i",
+        "--integrations",
+        help="path to integrations-core local folder",
+        default=None,
+    )
+    parser.add_option(
+        "-e",
+        "--extras",
+        help="path to integrations-extras local folder",
+        default=None,
+    )
+    parser.add_option(
+        "-s",
+        "--source",
+        help="location of src files",
+        default=curdir,
+    )
 
     options, args = parser.parse_args()
-    options.token = getenv('GITHUB_TOKEN', options.token) if not options.token else options.token
+    options.token = (
+        getenv("GITHUB_TOKEN", options.token)
+        if not options.token
+        else options.token
+    )
 
     pre = PreBuild(options)
     pre.process()
diff --git a/local/etc/pull_config.yaml b/local/etc/pull_config.yaml
new file mode 100644
index 0000000000000..829c485472749
--- /dev/null
+++ b/local/etc/pull_config.yaml
@@ -0,0 +1,50 @@
+---
+- org_name: DataDog
+  
+  repos:
+  - repo_name: dogweb
+    
+    contents:
+
+    - action: source
+      branch: prod
+      globs:
+      - dd/utils/context/source.py
+
+    - action: integrations
+      branch: prod
+      globs:
+      - integration/**/*_metadata.csv
+      - integration/**/manifest.json
+      - integration/**/service_checks.json
+      - integration/**/README.md
+
+  - repo_name: integrations-core
+    contents:
+
+    - action: integrations
+      branch: master
+      globs:
+      - "*[!}]/metadata.csv"
+      - "*[!}]/manifest.json"
+      - "*[!}]/service_checks.json"
+      - "*[!}]/README.md"
+
+    - action: pull-and-push
+      branch: master
+      globs:
+      - docs/dev/*.md
+      options:
+        dest_dir: '/developers/integrations/'
+        path_to_remove: 'docs/dev/'
+
+  - repo_name: integrations-extras
+    contents:
+
+    - action: integrations
+      branch: master
+      globs:
+      - "**/metadata.csv"
+      - "**/manifest.json"
+      - "**/service_checks.json"
+      - "**/README.md"
diff --git a/local/etc/requirements3.txt b/local/etc/requirements3.txt
index eeac424cda5ae..41ad3d6dff076 100644
--- a/local/etc/requirements3.txt
+++ b/local/etc/requirements3.txt
@@ -9,7 +9,7 @@ htmlmin>=0.1.10
 pycparser==2.14
 awscli==1.16.82
 requests==2.20.0
-PyYAML==3.13 
+PyYAML==3.13
 tqdm==4.14.0
 Pygments==2.2.0
 datadog==0.16.0