diff --git a/datadog_checks_dev/datadog_checks/dev/tooling/commands/validate/licenses.py b/datadog_checks_dev/datadog_checks/dev/tooling/commands/validate/licenses.py index 864f2bc65fc92..ed7e6883d0fdc 100644 --- a/datadog_checks_dev/datadog_checks/dev/tooling/commands/validate/licenses.py +++ b/datadog_checks_dev/datadog_checks/dev/tooling/commands/validate/licenses.py @@ -13,6 +13,7 @@ from ....fs import file_exists, read_file_lines, write_file_lines from ...constants import get_agent_requirements, get_license_attribution_file +from ...utils import get_extra_license_files, read_license_file_rows from ..console import CONTEXT_SETTINGS, abort, echo_failure, echo_info, echo_success EXPLICIT_LICENSES = { @@ -102,6 +103,17 @@ 'Zope Public License': 'ZPL-2.1', } +EXTRA_LICENSES = {'BSD-2-Clause'} + +VALID_LICENSES = ( + EXTRA_LICENSES + | set(KNOWN_LICENSES.values()) + | set(CLASSIFIER_TO_HIGHEST_SPDX.values()) + | set(KNOWN_CLASSIFIERS.values()) +) + +HEADERS = ['Component', 'Origin', 'License', 'Copyright'] + def format_attribution_line(package_name, license_id, package_copyright): package_copyright = ' | '.join(sorted(package_copyright)) @@ -157,6 +169,56 @@ async def scrape_license_data(urls): return package_data +def validate_extra_licenses(): + """ + Validates extra third party licenses. + + An integration may use code from an outside source or origin that is not pypi- + it will have a file in its check directory titled `LICENSE-3rdparty-extra.csv` + """ + lines = [] + any_errors = False + + all_extra_licenses = get_extra_license_files() + + for license_file in all_extra_licenses: + errors = False + rows = read_license_file_rows(license_file) + for line_no, row, line in rows: + # determine if number of columns is complete by checking for None values (DictReader populates missing columns with None https://docs.python.org/3.8/library/csv.html#csv.DictReader) # noqa + if None in row.values(): + errors = True + any_errors = True + echo_failure(f"{license_file}:{line_no} Has the wrong amount of columns") + continue + + # all headers exist, no invalid headers + all_keys = set(row) + ALL_HEADERS = set(HEADERS) + if all_keys != ALL_HEADERS: + invalid_headers = all_keys.difference(ALL_HEADERS) + if invalid_headers: + echo_failure(f'{license_file}:{line_no} Invalid column {invalid_headers}') + + missing_headers = ALL_HEADERS.difference(all_keys) + if missing_headers: + echo_failure(f'{license_file}:{line_no} Missing columns {missing_headers}') + + errors = True + any_errors = True + continue + license_type = row['License'] + if license_type not in VALID_LICENSES: + errors = True + any_errors = True + echo_failure(f'{license_file}:{line_no} Invalid license type {license_type}') + continue + if not errors: + lines.append(line) + + return lines, any_errors + + @click.command(context_settings=CONTEXT_SETTINGS, short_help='Validate third-party license list') @click.option('--sync', '-s', is_flag=True, help='Generate the `LICENSE-3rdparty.csv` file') @click.pass_context @@ -184,7 +246,9 @@ def licenses(ctx, sync): package_license_errors = defaultdict(list) - lines = ['Component,Origin,License,Copyright\n'] + header_line = "{}\n".format(','.join(HEADERS)) + + lines = [header_line] for package_name, data in sorted(package_data.items()): if package_name in EXPLICIT_LICENSES: for license_id in sorted(EXPLICIT_LICENSES[package_name]): @@ -238,9 +302,17 @@ def licenses(ctx, sync): abort() + extra_licenses_lines, any_errors = validate_extra_licenses() + lines.extend(extra_licenses_lines) + lines.sort() license_attribution_file = get_license_attribution_file() if sync: write_file_lines(license_attribution_file, lines) - echo_success('Success!') + if any_errors: + abort('Failed to write all extra licenses. Please fix any reported errors') + else: + echo_success('Success!') elif read_file_lines(license_attribution_file) != lines: abort('Out of sync, run again with the --sync flag') + elif any_errors: + abort() diff --git a/datadog_checks_dev/datadog_checks/dev/tooling/utils.py b/datadog_checks_dev/datadog_checks/dev/tooling/utils.py index 4c6e8ded9f7f7..aa9e0e2686c30 100644 --- a/datadog_checks_dev/datadog_checks/dev/tooling/utils.py +++ b/datadog_checks_dev/datadog_checks/dev/tooling/utils.py @@ -218,6 +218,15 @@ def get_tox_file(check_name): return os.path.join(get_root(), check_name, 'tox.ini') +def get_extra_license_files(): + for path in os.listdir(get_root()): + if not file_exists(get_manifest_file(path)): + continue + extra_license_file = os.path.join(get_root(), path, 'LICENSE-3rdparty-extra.csv') + if file_exists(extra_license_file): + yield extra_license_file + + def get_metadata_file(check_name): path = load_manifest(check_name).get('assets', {}).get("metrics_metadata", "metadata.csv") return os.path.join(get_root(), check_name, path) @@ -403,6 +412,24 @@ def read_metadata_rows(metadata_file): yield line_no, row +def read_license_file_rows(license_file): + """ + Iterate over the rows of a `LICENSE-3rdparty-extra.csv` or `LICENSE-3rdparty.csv` file. + """ + with io.open(license_file, 'r', encoding='utf-8') as f: + lines = f.readlines() + f.seek(0) + reader = csv.DictReader(f, delimiter=',') + + # Read header + reader._fieldnames = reader.fieldnames + + for line_no, row in enumerate(reader, 2): + # return the original line because it will be needed to append to the original file + line = lines[line_no - 1] + yield line_no, row, line + + def read_readme_file(check_name): return read_file(get_readme_file(check_name))