From 40e0e1484f9aec3c0ad8e94d5b47dbdc1ec76628 Mon Sep 17 00:00:00 2001 From: Zachary Groves <32471391+ZStriker19@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:08:52 -0400 Subject: [PATCH] feat(onboarding): early exit conditions in lib-injection (#9323) This pull request adds "guardrails" to the "library injection" process. These are early exit conditions from the instrumentation process intended to avoid sending any traces when undefined behavior is likely. The code makes this determination on the basis of software versions present in the application environment, both of Python packages and the Python runtime itself. The biggest risk here is that instrumentation is disabled when it's not intended to be. I think existing tests in `tests/lib-injection` cover this pretty well. There's a new test added that verifies instrumentation was cancelled when an unsupported package version is present. Contains changes from https://github.com/DataDog/dd-trace-py/pull/9418 Related RFC: "[RFC] One Step Guardrails" - [x] minimum package version checks - [x] Testing - [x] replace envvars with inject_force - [x] figure out what to use instead of pkg_resources - [x] replace local file path with `DD_TELEMETRY_FORWARDER_PATH` - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Emmett Butler <723615+emmettbutler@users.noreply.github.com> Co-authored-by: Emmett Butler (cherry picked from commit 0c38e09db92cceedc8affff4a217357fc2a1bcac) --- .github/workflows/lib-injection.yml | 53 +++- .gitlab/build-oci.sh | 1 + lib-injection/Dockerfile | 1 + lib-injection/copy-lib.sh | 1 + lib-injection/docker-compose.yml | 2 + lib-injection/min_compatible_versions.csv | 188 ++++++++++++ lib-injection/sitecustomize.py | 284 +++++++++++++++--- min_compatible_versions.csv | 188 ++++++++++++ ...injection-guardrails-bde1d57db91f33d1.yaml | 6 + scripts/min_compatible_versions.py | 80 +++++ tests/.suitespec.json | 2 + .../Dockerfile | 11 + .../django_app.py | 30 ++ .../Dockerfile | 10 + .../django_app.py | 30 ++ .../Dockerfile | 4 +- .../dd-lib-python-init-test-django/Dockerfile | 2 + 17 files changed, 851 insertions(+), 42 deletions(-) create mode 100644 lib-injection/min_compatible_versions.csv create mode 100644 min_compatible_versions.csv create mode 100644 releasenotes/notes/injection-guardrails-bde1d57db91f33d1.yaml create mode 100644 scripts/min_compatible_versions.py create mode 100644 tests/lib-injection/dd-lib-python-init-test-django-unsupported-package-force/Dockerfile create mode 100644 tests/lib-injection/dd-lib-python-init-test-django-unsupported-package-force/django_app.py create mode 100644 tests/lib-injection/dd-lib-python-init-test-django-unsupported-package/Dockerfile create mode 100644 tests/lib-injection/dd-lib-python-init-test-django-unsupported-package/django_app.py diff --git a/.github/workflows/lib-injection.yml b/.github/workflows/lib-injection.yml index 893242f38a8..4cd8fb65432 100644 --- a/.github/workflows/lib-injection.yml +++ b/.github/workflows/lib-injection.yml @@ -42,7 +42,7 @@ jobs: repository: 'DataDog/system-tests' - name: Install runner - uses: ./.github/actions/install_runner + uses: ./.github/actions/install_runner - name: Run K8s Lib Injection Tests run: ./run.sh K8S_LIB_INJECTION_BASIC @@ -70,7 +70,7 @@ jobs: 'dd-lib-python-init-test-django-uvicorn', 'dd-lib-python-init-test-django-no-perms', 'dd-lib-python-init-test-django-pre-installed', - 'dd-lib-python-init-test-django-unsupported-python', + 'dd-lib-python-init-test-django-unsupported-package-force', ] fail-fast: false steps: @@ -116,3 +116,52 @@ jobs: if: success() || failure() run: | docker compose logs + + test_unit_no_instrumentation: + runs-on: ubuntu-latest + strategy: + matrix: + variant: [ + 'dd-lib-python-init-test-django-unsupported-python', + 'dd-lib-python-init-test-django-unsupported-package', + ] + fail-fast: false + steps: + - uses: actions/checkout@v4 + - name: Build and run the app + run: | + SRC="$(pwd)" + cd lib-injection + export DDTRACE_PYTHON_VERSION="v2.6.3" + export APP_CONTEXT="${SRC}/tests/lib-injection/${{matrix.variant}}" + export TEMP_DIR="${SRC}/tmp/ddtrace" + mkdir -p "${TEMP_DIR}" + # Give the temp dir permissions, by default the docker user doesn't have permissions + # to write to the filesystem. + chmod 777 $TEMP_DIR + # Start the lib_inject to get the files copied. This avoids a race condition with the startup of the + # application. + docker compose up --build lib_inject + docker compose up --build -d + # Wait for the app to start + sleep 60 + docker compose logs + - name: Check Permissions on ddtrace pkgs + run: | + cd lib-injection + # Ensure /datadog-lib/ddtrace_pkgs is a valid directory that is not empty + docker compose run lib_inject find /datadog-init/ddtrace_pkgs -maxdepth 0 -empty | wc -l && if [ $? -ne 0 ]; then exit 1; fi + # Ensure files are not world writeable + docker compose run lib_inject find /datadog-init/ddtrace_pkgs ! -perm /o+w | wc -l && if [ $? -ne 0 ]; then exit 1; fi + # Ensure all users have read and execute permissions to files stored in /datadog-lib/ddtrace_pkgs + docker compose run lib_inject find /datadog-init/ddtrace_pkgs ! -perm u=rwx,o=rx | wc -l && if [ $? -ne 0 ]; then exit 1; fi + - name: Test the app + run: | + curl http://localhost:18080 + sleep 1 # wait for traces to be sent + - name: Print traces + run: curl http://localhost:8126/test/traces + - name: Check test agent received no trace + run: | + N=$(curl http://localhost:8126/test/traces | jq -r -e 'length') + [[ $N == "0" ]] diff --git a/.gitlab/build-oci.sh b/.gitlab/build-oci.sh index e68ea041434..5db2d459e45 100755 --- a/.gitlab/build-oci.sh +++ b/.gitlab/build-oci.sh @@ -41,6 +41,7 @@ fi echo -n $PYTHON_PACKAGE_VERSION > auto_inject-python.version cp ../lib-injection/sitecustomize.py $BUILD_DIR/ cp auto_inject-python.version $BUILD_DIR/version +cp ../min_compatible_versions.csv $BUILD_DIR/ chmod -R +r $BUILD_DIR chmod -R o-w $BUILD_DIR chmod -R g-w $BUILD_DIR diff --git a/lib-injection/Dockerfile b/lib-injection/Dockerfile index d15336e27a8..8e046a55896 100644 --- a/lib-injection/Dockerfile +++ b/lib-injection/Dockerfile @@ -33,5 +33,6 @@ RUN chown -R datadog:datadog /datadog-init/ddtrace_pkgs RUN chmod -R 755 /datadog-init/ddtrace_pkgs USER ${UID} WORKDIR /datadog-init +ADD min_compatible_versions.csv /datadog-init/min_compatible_versions.csv ADD sitecustomize.py /datadog-init/sitecustomize.py ADD copy-lib.sh /datadog-init/copy-lib.sh diff --git a/lib-injection/copy-lib.sh b/lib-injection/copy-lib.sh index 6465692635c..2a17b77d763 100755 --- a/lib-injection/copy-lib.sh +++ b/lib-injection/copy-lib.sh @@ -3,4 +3,5 @@ # This script is used by the admission controller to install the library from the # init container into the application container. cp sitecustomize.py "$1/sitecustomize.py" +cp min_compatible_versions.csv "$1/min_compatible_versions.csv" cp -r ddtrace_pkgs "$1/ddtrace_pkgs" diff --git a/lib-injection/docker-compose.yml b/lib-injection/docker-compose.yml index d7159d7c673..1586273555e 100644 --- a/lib-injection/docker-compose.yml +++ b/lib-injection/docker-compose.yml @@ -30,6 +30,7 @@ services: environment: - PYTHONPATH=/datadog-lib - DD_TRACE_AGENT_URL=http://testagent:8126 + - DD_TELEMETRY_FORWARDER_PATH= volumes: - ${TEMP_DIR:-/tmp/ddtrace_test}:/datadog-lib @@ -45,5 +46,6 @@ services: - PYTHONPATH=/datadog-lib - DD_TRACE_AGENT_URL=http://testagent:8126 - DD_TRACE_DEBUG=1 + - DD_TELEMETRY_FORWARDER_PATH= volumes: - ${TEMP_DIR:-/tmp/ddtrace_test}:/datadog-lib diff --git a/lib-injection/min_compatible_versions.csv b/lib-injection/min_compatible_versions.csv new file mode 100644 index 00000000000..770883f7e87 --- /dev/null +++ b/lib-injection/min_compatible_versions.csv @@ -0,0 +1,188 @@ +This file was generated by scripts/min_compatible_versions.py +pkg_name,min_version +Flask-Cache,~=0.13.1 +Jinja2,~=2.11.0 +SQLAlchemy,==2.0.22 +WebTest,0 +Werkzeug,<1.0 +ai21,0 +aiobotocore,~=1.4.2 +aiofiles,0 +aiohttp,~=3.7 +aiohttp_jinja2,~=1.5.0 +aiomysql,~=0.1.0 +aiopg,~=0.16.0 +aiosqlite,0 +algoliasearch,~=2.5 +anyio,>=3.4.0 +aredis,0 +asgiref,~=3.0 +astunparse,0 +async_generator,~=1.10 +asyncpg,~=0.22.0 +asynctest,==0.13.0 +attrs,>=20 +austin-python,~=1.0 +blinker,0 +boto3,0 +botocore,~=1.13 +bottle,>=0.12 +bytecode,0 +cassandra-driver,~=3.24.0 +cattrs,<23.1.1 +celery,~=4.4 +cfn-lint,~=0.53.1 +channels,~=3.0 +cherrypy,>=17 +click,==7.1.2 +cohere,==4.57 +confluent-kafka,~=1.9.2 +coverage,0 +cryptography,<39 +daphne,0 +databases,0 +datadog-lambda,>=4.66.0 +ddsketch,>=3.0.0 +django,>=2.2 +django-pylibmc,>=0.6 +django-q,0 +django-redis,>=4.5 +django_hosts,~=4.0 +djangorestframework,>=3.11 +docker,0 +dogpile.cache,~=0.9 +dramatiq,0 +elasticsearch,~=7.13.0 +elasticsearch1,~=1.10.0 +elasticsearch2,~=2.5.0 +elasticsearch5,~=5.5.0 +elasticsearch6,~=6.8.0 +elasticsearch7,~=7.13.0 +elasticsearch7[async],0 +elasticsearch8,~=8.0.1 +elasticsearch[async],0 +envier,==0.5.1 +exceptiongroup,0 +falcon,~=3.0 +fastapi,~=0.64.0 +flask,~=0.12.0 +flask-caching,~=1.10.0 +flask-login,~=0.6.2 +gevent,~=20.12.0 +git+https://github.com/gnufede/pytest-memray.git@24a3c0735db99eedf57fb36c573680f9bab7cd73,0 +googleapis-common-protos,0 +graphene,~=3.0.0 +graphql-core,~=3.2.0 +graphql-relay,0 +greenlet,~=1.0.0 +grpcio,~=1.34.0 +gunicorn,==20.0.4 +gunicorn[gevent],0 +httpretty,<1.1 +httpx,~=0.17.0 +huggingface-hub,0 +hypothesis,<6.45.1 +importlib-metadata,0 +importlib_metadata,<5.0 +itsdangerous,<2.0 +jinja2,~=2.11.0 +kombu,>=4.2.0 +langchain,==0.0.192 +langchain-aws,0 +langchain-community,==0.0.14 +langchain-core,==0.1.52 +langchain-openai,==0.1.6 +langchain-pinecone,==0.1.0 +langchain_experimental,==0.0.47 +langsmith,==0.1.58 +logbook,~=1.0.0 +loguru,~=0.4.0 +mako,~=1.1.0 +mariadb,~=1.0.0 +markupsafe,<2.0 +mock,0 +molten,>=1.0 +mongoengine,~=0.23 +more_itertools,<8.11.0 +moto,>=1.0 +moto[all],<5.0 +msgpack,~=1.0.0 +mysql-connector-python,==8.0.5 +mysqlclient,~=2.0 +numexpr,0 +openai,==0.26.5 +openai[datalib],==1.30.1 +"openai[embeddings,datalib]",==0.27.2 +opensearch-py,0 +opensearch-py[async],0 +opensearch-py[requests],~=1.1.0 +opentelemetry-api,>=1 +opentelemetry-instrumentation-flask,<=0.37b0 +opentracing,>=2.0.0 +peewee,0 +pillow,0 +pinecone-client,==2.2.4 +pony,0 +protobuf,>=3 +psutil,0 +psycopg,~=3.0.18 +psycopg2-binary,~=2.8.0 +py-cpuinfo,~=8.0.0 +pycryptodome,0 +pyfakefs,0 +pylibmc,~=1.6.2 +pymemcache,~=3.4.2 +pymongo,~=3.11 +pymysql,~=0.10 +pynamodb,~=5.0 +pyodbc,~=4.0.31 +pyramid,~=1.10 +pysqlite3-binary,0 +pytest,~=4.0 +pytest-aiohttp,0 +pytest-asyncio,==0.21.1 +pytest-bdd,>=4.0 +pytest-benchmark,>=3.1.0 +pytest-cov,==2.9.0 +pytest-django,==3.10.0 +pytest-mock,==2.0.0 +pytest-randomly,0 +pytest-sanic,~=1.6.2 +python-consul,>=1.1 +python-json-logger,==2.0.7 +python-memcached,0 +redis,~=2.0 +redis-py-cluster,>=2.0 +reno,0 +requests,~=2.20.0 +requests-mock,>=1.4 +responses,~=0.16.0 +rich,0 +rq,~=1.8.0 +ruamel.yaml,0 +sanic,~=20.12 +sanic-testing,~=0.8.3 +scikit-learn,==1.0.2 +simplejson,0 +six,==1.12.0 +snowflake-connector-python,~=2.3.0 +sqlalchemy,~=1.2.18 +starlette,~=0.14.0 +structlog,~=20.2.0 +tests/contrib/pyramid/pserve_app,0 +tiktoken,0 +tornado,~=4.5.0 +tortoise-orm,0 +typing-extensions,0 +typing_extensions,0 +urllib3,~=1.0 +uwsgi,0 +vcrpy,==4.2.1 +vertica-python,>=0.6.0 +websockets,<11.0 +webtest,0 +werkzeug,<1.0 +wheel,0 +xmltodict,>=0.12 +yaaredis,~=2.0.0 +yarl,~=1.0 diff --git a/lib-injection/sitecustomize.py b/lib-injection/sitecustomize.py index bbdf9124f3b..a449af39fa2 100644 --- a/lib-injection/sitecustomize.py +++ b/lib-injection/sitecustomize.py @@ -2,14 +2,122 @@ This module when included on the PYTHONPATH will update the PYTHONPATH to point to a directory containing the ddtrace package compatible with the current Python version and platform. """ + +from __future__ import print_function # noqa: E402 + +from collections import namedtuple +import csv +import json import os +import platform +import re +import subprocess import sys import time +from typing import Tuple + + +Version = namedtuple("Version", ["version", "constraint"]) + + +def parse_version(version: str) -> Tuple: + constraint_idx = re.search(r"\d", version).start() + numeric = version[constraint_idx:] + constraint = version[:constraint_idx] + parsed_version = tuple(int(re.sub("[^0-9]", "", p)) for p in numeric.split(".")) + return Version(parsed_version, constraint) + + +RUNTIMES_ALLOW_LIST = { + "cpython": {"min": parse_version("3.7"), "max": parse_version("3.13")}, +} + +FORCE_INJECT = os.environ.get("DD_INJECT_FORCE", "").lower() in ( + "true", + "1", + "t", +) +FORWARDER_EXECUTABLE = os.environ.get("DD_TELEMETRY_FORWARDER_PATH") +TELEMETRY_ENABLED = os.environ.get("DD_INJECTION_ENABLED") +DEBUG_MODE = os.environ.get("DD_TRACE_DEBUG", "").lower() in ("true", "1", "t") +INSTALLED_PACKAGES = None +PYTHON_VERSION = None +PYTHON_RUNTIME = None +PKGS_ALLOW_LIST = None +VERSION_COMPAT_FILE_LOCATIONS = ("../datadog-lib/min_compatible_versions.csv", "min_compatible_versions.csv") + + +def build_installed_pkgs(): + installed_packages = {} + if sys.version_info >= (3, 8): + from importlib import metadata as importlib_metadata + + installed_packages = {pkg.metadata["Name"]: pkg.version for pkg in importlib_metadata.distributions()} + else: + try: + import pkg_resources + + installed_packages = {pkg.key: pkg.version for pkg in pkg_resources.working_set} + except ImportError: + try: + import importlib_metadata + + installed_packages = {pkg.metadata["Name"]: pkg.version for pkg in importlib_metadata.distributions()} + except ImportError: + pass + return {key.lower(): value for key, value in installed_packages.items()} + + +def build_min_pkgs(): + min_pkgs = dict() + for location in VERSION_COMPAT_FILE_LOCATIONS: + if os.path.exists(location): + with open(location, "r") as csvfile: + csv_reader = csv.reader(csvfile, delimiter=",") + for idx, row in enumerate(csv_reader): + if idx < 2: + continue + min_pkgs[row[0].lower()] = parse_version(row[1]) + break + return min_pkgs -debug_mode = os.environ.get("DD_TRACE_DEBUG", "").lower() in ("true", "1", "t") -# Python versions that are supported by the current ddtrace release -installable_py_versions = ("3.7", "3.8", "3.9", "3.10", "3.11", "3.12") +def create_count_metric(metric, tags=None): + if tags is None: + tags = [] + return { + "name": metric, + "tags": tags, + } + + +def gen_telemetry_payload(telemetry_events): + return { + "metadata": { + "language_name": "python", + "language_version": PYTHON_VERSION, + "runtime_name": PYTHON_RUNTIME, + "runtime_version": PYTHON_VERSION, + "tracer_version": INSTALLED_PACKAGES.get("ddtrace", "unknown"), + "pid": os.getpid(), + }, + "points": telemetry_events, + } + + +def send_telemetry(event): + event_json = json.dumps(event) + if not FORWARDER_EXECUTABLE or not TELEMETRY_ENABLED: + return + p = subprocess.Popen( + [FORWARDER_EXECUTABLE, str(os.getpid())], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) + p.stdin.write(event_json) + p.stdin.close() def _get_clib(): @@ -17,7 +125,6 @@ def _get_clib(): If GNU is not detected then returns MUSL. """ - import platform libc, version = platform.libc_ver() if libc == "glibc": @@ -25,42 +132,121 @@ def _get_clib(): return "musl" -def _log(msg, *args, level="info"): +def _log(msg, *args, **kwargs): """Log a message to stderr. This function is provided instead of built-in Python logging since we can't rely on any logger being configured. """ - if debug_mode: + level = kwargs.get("level", "info") + if DEBUG_MODE: asctime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) msg = "[%s] [%s] datadog.autoinstrumentation(pid: %d): " % (asctime, level.upper(), os.getpid()) + msg % args print(msg, file=sys.stderr) +def runtime_version_is_supported(python_runtime, python_version): + supported_versions = RUNTIMES_ALLOW_LIST.get(python_runtime, {}) + if not supported_versions: + return False + return ( + supported_versions["min"].version <= parse_version(python_version).version < supported_versions["max"].version + ) + + +def package_is_compatible(package_name, package_version): + installed_version = parse_version(package_version) + supported_version_spec = PKGS_ALLOW_LIST.get(package_name.lower(), Version((0,), "")) + if supported_version_spec.constraint in ("<", "<="): + return True # minimum "less than" means there is no minimum + return installed_version.version >= supported_version_spec.version + + def _inject(): + global INSTALLED_PACKAGES + global PYTHON_VERSION + global PYTHON_RUNTIME + global PKGS_ALLOW_LIST + INSTALLED_PACKAGES = build_installed_pkgs() + PYTHON_RUNTIME = platform.python_implementation().lower() + PYTHON_VERSION = platform.python_version() + PKGS_ALLOW_LIST = build_min_pkgs() + telemetry_data = [] + integration_incomp = False + runtime_incomp = False try: import ddtrace - except ModuleNotFoundError: + except ImportError: _log("user-installed ddtrace not found, configuring application to use injection site-packages") - platform = "manylinux2014" if _get_clib() == "gnu" else "musllinux_1_1" - _log("detected platform %s" % platform, level="debug") + current_platform = "manylinux2014" if _get_clib() == "gnu" else "musllinux_1_1" + _log("detected platform %s" % current_platform, level="debug") script_dir = os.path.dirname(__file__) pkgs_path = os.path.join(script_dir, "ddtrace_pkgs") _log("ddtrace_pkgs path is %r" % pkgs_path, level="debug") _log("ddtrace_pkgs contents: %r" % os.listdir(pkgs_path), level="debug") - python_version = ".".join(str(i) for i in sys.version_info[:2]) - if python_version not in installable_py_versions: + # check installed packages against allow list + incompatible_packages = {} + for package_name, package_version in INSTALLED_PACKAGES.items(): + if not package_is_compatible(package_name, package_version): + incompatible_packages[package_name] = package_version + + if incompatible_packages: + _log("Found incompatible packages: %s." % incompatible_packages, level="debug") + integration_incomp = True + if not FORCE_INJECT: + _log("Aborting dd-trace-py instrumentation.", level="debug") + + for key, value in incompatible_packages.items(): + telemetry_data.append( + create_count_metric( + "library_entrypoint.abort.integration", + [ + "integration:" + key, + "integration_version:" + value, + ], + ) + ) + + else: + _log( + "DD_INJECT_FORCE set to True, allowing unsupported integrations and continuing.", + level="debug", + ) + if not runtime_version_is_supported(PYTHON_RUNTIME, PYTHON_VERSION): _log( - f"This version of ddtrace does not support single step instrumentation with python {python_version} " - f"(supported versions: {installable_py_versions}), aborting", - level="error", + "Found incompatible runtime: %s %s. Supported runtimes: %s" + % (PYTHON_RUNTIME, PYTHON_VERSION, RUNTIMES_ALLOW_LIST), + level="debug", + ) + runtime_incomp = True + if not FORCE_INJECT: + _log("Aborting dd-trace-py instrumentation.", level="debug") + + telemetry_data.append(create_count_metric("library_entrypoint.abort.runtime")) + else: + _log( + "DD_INJECT_FORCE set to True, allowing unsupported runtimes and continuing.", + level="debug", + ) + if telemetry_data: + telemetry_data.append( + create_count_metric( + "library_entrypoint.abort", + [ + "reason:integration" if integration_incomp else "reason:incompatible_runtime", + ], + ) ) + telemetry_event = gen_telemetry_payload(telemetry_data) + send_telemetry(telemetry_event) return - site_pkgs_path = os.path.join(pkgs_path, "site-packages-ddtrace-py%s-%s" % (python_version, platform)) + site_pkgs_path = os.path.join( + pkgs_path, "site-packages-ddtrace-py%s-%s" % (".".join(PYTHON_VERSION.split(".")[:2]), current_platform) + ) _log("site-packages path is %r" % site_pkgs_path, level="debug") if not os.path.exists(site_pkgs_path): _log("ddtrace site-packages not found in %r, aborting" % site_pkgs_path, level="error") @@ -69,7 +255,6 @@ def _inject(): # Add the custom site-packages directory to the Python path to load the ddtrace package. sys.path.insert(0, site_pkgs_path) _log("sys.path %s" % sys.path, level="debug") - try: import ddtrace # noqa: F401 @@ -79,29 +264,52 @@ def _inject(): else: # In injected environments, the profiler needs to know that it is only allowed to use the native exporter os.environ["DD_PROFILING_EXPORT_LIBDD_REQUIRED"] = "true" - # This import has the same effect as ddtrace-run for the current process (auto-instrument all libraries). - import ddtrace.bootstrap.sitecustomize - - # Modify the PYTHONPATH for any subprocesses that might be spawned: - # - Remove the PYTHONPATH entry used to bootstrap this installation as it's no longer necessary - # now that the package is installed. - # - Add the custom site-packages directory to PYTHONPATH to ensure the ddtrace package can be loaded - # - Add the ddtrace bootstrap dir to the PYTHONPATH to achieve the same effect as ddtrace-run. - python_path = os.getenv("PYTHONPATH", "").split(os.pathsep) - if script_dir in python_path: - python_path.remove(script_dir) - python_path.insert(0, site_pkgs_path) - bootstrap_dir = os.path.abspath(os.path.dirname(ddtrace.bootstrap.sitecustomize.__file__)) - python_path.insert(0, bootstrap_dir) - python_path = os.pathsep.join(python_path) - os.environ["PYTHONPATH"] = python_path - - # Also insert the bootstrap dir in the path of the current python process. - sys.path.insert(0, bootstrap_dir) - _log("successfully configured ddtrace package, python path is %r" % os.environ["PYTHONPATH"]) + try: + import ddtrace.bootstrap.sitecustomize + + # Modify the PYTHONPATH for any subprocesses that might be spawned: + # - Remove the PYTHONPATH entry used to bootstrap this installation as it's no longer necessary + # now that the package is installed. + # - Add the custom site-packages directory to PYTHONPATH to ensure the ddtrace package can be loaded + # - Add the ddtrace bootstrap dir to the PYTHONPATH to achieve the same effect as ddtrace-run. + python_path = os.getenv("PYTHONPATH", "").split(os.pathsep) + if script_dir in python_path: + python_path.remove(script_dir) + python_path.insert(0, site_pkgs_path) + bootstrap_dir = os.path.abspath(os.path.dirname(ddtrace.bootstrap.sitecustomize.__file__)) + python_path.insert(0, bootstrap_dir) + python_path = os.pathsep.join(python_path) + os.environ["PYTHONPATH"] = python_path + + # Also insert the bootstrap dir in the path of the current python process. + sys.path.insert(0, bootstrap_dir) + _log("successfully configured ddtrace package, python path is %r" % os.environ["PYTHONPATH"]) + event = gen_telemetry_payload( + [ + create_count_metric( + "library_entrypoint.complete", + [ + "injection_forced:" + str(runtime_incomp or integration_incomp).lower(), + ], + ) + ] + ) + send_telemetry(event) + except Exception as e: + event = gen_telemetry_payload( + [create_count_metric("library_entrypoint.error", ["error:" + type(e).__name__.lower()])] + ) + send_telemetry(event) + _log("failed to load ddtrace.bootstrap.sitecustomize: %s" % e, level="error") + return else: - _log(f"user-installed ddtrace found: {ddtrace.__version__}, aborting site-packages injection", level="warning") + _log( + "user-installed ddtrace found: %s, aborting site-packages injection" % ddtrace.__version__, level="warning" + ) -_inject() +try: + _inject() +except Exception: + pass # absolutely never allow exceptions to propagate to the app diff --git a/min_compatible_versions.csv b/min_compatible_versions.csv new file mode 100644 index 00000000000..770883f7e87 --- /dev/null +++ b/min_compatible_versions.csv @@ -0,0 +1,188 @@ +This file was generated by scripts/min_compatible_versions.py +pkg_name,min_version +Flask-Cache,~=0.13.1 +Jinja2,~=2.11.0 +SQLAlchemy,==2.0.22 +WebTest,0 +Werkzeug,<1.0 +ai21,0 +aiobotocore,~=1.4.2 +aiofiles,0 +aiohttp,~=3.7 +aiohttp_jinja2,~=1.5.0 +aiomysql,~=0.1.0 +aiopg,~=0.16.0 +aiosqlite,0 +algoliasearch,~=2.5 +anyio,>=3.4.0 +aredis,0 +asgiref,~=3.0 +astunparse,0 +async_generator,~=1.10 +asyncpg,~=0.22.0 +asynctest,==0.13.0 +attrs,>=20 +austin-python,~=1.0 +blinker,0 +boto3,0 +botocore,~=1.13 +bottle,>=0.12 +bytecode,0 +cassandra-driver,~=3.24.0 +cattrs,<23.1.1 +celery,~=4.4 +cfn-lint,~=0.53.1 +channels,~=3.0 +cherrypy,>=17 +click,==7.1.2 +cohere,==4.57 +confluent-kafka,~=1.9.2 +coverage,0 +cryptography,<39 +daphne,0 +databases,0 +datadog-lambda,>=4.66.0 +ddsketch,>=3.0.0 +django,>=2.2 +django-pylibmc,>=0.6 +django-q,0 +django-redis,>=4.5 +django_hosts,~=4.0 +djangorestframework,>=3.11 +docker,0 +dogpile.cache,~=0.9 +dramatiq,0 +elasticsearch,~=7.13.0 +elasticsearch1,~=1.10.0 +elasticsearch2,~=2.5.0 +elasticsearch5,~=5.5.0 +elasticsearch6,~=6.8.0 +elasticsearch7,~=7.13.0 +elasticsearch7[async],0 +elasticsearch8,~=8.0.1 +elasticsearch[async],0 +envier,==0.5.1 +exceptiongroup,0 +falcon,~=3.0 +fastapi,~=0.64.0 +flask,~=0.12.0 +flask-caching,~=1.10.0 +flask-login,~=0.6.2 +gevent,~=20.12.0 +git+https://github.com/gnufede/pytest-memray.git@24a3c0735db99eedf57fb36c573680f9bab7cd73,0 +googleapis-common-protos,0 +graphene,~=3.0.0 +graphql-core,~=3.2.0 +graphql-relay,0 +greenlet,~=1.0.0 +grpcio,~=1.34.0 +gunicorn,==20.0.4 +gunicorn[gevent],0 +httpretty,<1.1 +httpx,~=0.17.0 +huggingface-hub,0 +hypothesis,<6.45.1 +importlib-metadata,0 +importlib_metadata,<5.0 +itsdangerous,<2.0 +jinja2,~=2.11.0 +kombu,>=4.2.0 +langchain,==0.0.192 +langchain-aws,0 +langchain-community,==0.0.14 +langchain-core,==0.1.52 +langchain-openai,==0.1.6 +langchain-pinecone,==0.1.0 +langchain_experimental,==0.0.47 +langsmith,==0.1.58 +logbook,~=1.0.0 +loguru,~=0.4.0 +mako,~=1.1.0 +mariadb,~=1.0.0 +markupsafe,<2.0 +mock,0 +molten,>=1.0 +mongoengine,~=0.23 +more_itertools,<8.11.0 +moto,>=1.0 +moto[all],<5.0 +msgpack,~=1.0.0 +mysql-connector-python,==8.0.5 +mysqlclient,~=2.0 +numexpr,0 +openai,==0.26.5 +openai[datalib],==1.30.1 +"openai[embeddings,datalib]",==0.27.2 +opensearch-py,0 +opensearch-py[async],0 +opensearch-py[requests],~=1.1.0 +opentelemetry-api,>=1 +opentelemetry-instrumentation-flask,<=0.37b0 +opentracing,>=2.0.0 +peewee,0 +pillow,0 +pinecone-client,==2.2.4 +pony,0 +protobuf,>=3 +psutil,0 +psycopg,~=3.0.18 +psycopg2-binary,~=2.8.0 +py-cpuinfo,~=8.0.0 +pycryptodome,0 +pyfakefs,0 +pylibmc,~=1.6.2 +pymemcache,~=3.4.2 +pymongo,~=3.11 +pymysql,~=0.10 +pynamodb,~=5.0 +pyodbc,~=4.0.31 +pyramid,~=1.10 +pysqlite3-binary,0 +pytest,~=4.0 +pytest-aiohttp,0 +pytest-asyncio,==0.21.1 +pytest-bdd,>=4.0 +pytest-benchmark,>=3.1.0 +pytest-cov,==2.9.0 +pytest-django,==3.10.0 +pytest-mock,==2.0.0 +pytest-randomly,0 +pytest-sanic,~=1.6.2 +python-consul,>=1.1 +python-json-logger,==2.0.7 +python-memcached,0 +redis,~=2.0 +redis-py-cluster,>=2.0 +reno,0 +requests,~=2.20.0 +requests-mock,>=1.4 +responses,~=0.16.0 +rich,0 +rq,~=1.8.0 +ruamel.yaml,0 +sanic,~=20.12 +sanic-testing,~=0.8.3 +scikit-learn,==1.0.2 +simplejson,0 +six,==1.12.0 +snowflake-connector-python,~=2.3.0 +sqlalchemy,~=1.2.18 +starlette,~=0.14.0 +structlog,~=20.2.0 +tests/contrib/pyramid/pserve_app,0 +tiktoken,0 +tornado,~=4.5.0 +tortoise-orm,0 +typing-extensions,0 +typing_extensions,0 +urllib3,~=1.0 +uwsgi,0 +vcrpy,==4.2.1 +vertica-python,>=0.6.0 +websockets,<11.0 +webtest,0 +werkzeug,<1.0 +wheel,0 +xmltodict,>=0.12 +yaaredis,~=2.0.0 +yarl,~=1.0 diff --git a/releasenotes/notes/injection-guardrails-bde1d57db91f33d1.yaml b/releasenotes/notes/injection-guardrails-bde1d57db91f33d1.yaml new file mode 100644 index 00000000000..d2537794dc7 --- /dev/null +++ b/releasenotes/notes/injection-guardrails-bde1d57db91f33d1.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + SSI: Introduces generic safeguards for automatic instrumentation when using single step install in the form of early exit conditions. + Early exit from instrumentation is triggered if a version of software in the environment is not explicitly supported by ddtrace. The Python runtime + itself and many Python packages are checked for explicit support on the basis of their version. diff --git a/scripts/min_compatible_versions.py b/scripts/min_compatible_versions.py new file mode 100644 index 00000000000..c32ad5351ea --- /dev/null +++ b/scripts/min_compatible_versions.py @@ -0,0 +1,80 @@ +import csv +import pathlib +import sys +from typing import Dict +from typing import List +from typing import Set + +from packaging.version import parse as parse_version + + +sys.path.append(str(pathlib.Path(__file__).parent.parent.resolve())) +import riotfile # noqa:E402 + + +OUT_FILENAME = "min_compatible_versions.csv" +OUT_DIRECTORIES = (".", "lib-injection") +IGNORED_PACKAGES = {"setuptools"} + + +def _format_version_specifiers(spec: Set[str]) -> Set[str]: + return set([part for v in [v.split(",") for v in spec if v] for part in v if "!=" not in part]) + + +def tree_pkgs_from_riot() -> Dict[str, Set[str]]: + return _tree_pkgs_from_riot(riotfile.venv) + + +def _tree_pkgs_from_riot(node: riotfile.Venv) -> Dict[str, Set]: + result = { + pkg: _format_version_specifiers(set(versions)) + for pkg, versions in node.pkgs.items() + if pkg not in IGNORED_PACKAGES + } + for child_venv in node.venvs: + child_pkgs = _tree_pkgs_from_riot(child_venv) + for pkg_name, versions in child_pkgs.items(): + if pkg_name in IGNORED_PACKAGES: + continue + if pkg_name in result: + result[pkg_name] = result[pkg_name].union(versions) + else: + result[pkg_name] = versions + return result + + +def min_version_spec(version_specs: List[str]) -> str: + min_numeric = "" + min_spec = "" + for spec in version_specs: + numeric = parse_version(spec.strip("~==<>")) + if not min_numeric or numeric < min_numeric: + min_numeric = numeric + min_spec = spec + return min_spec + + +def write_out(all_pkgs: Dict[str, Set[str]], outfile: str) -> None: + with open(outfile, "w") as csvfile: + csv_writer = csv.writer(csvfile, delimiter=",") + csv_writer.writerow(["This file was generated by scripts/min_compatible_versions.py"]) + csv_writer.writerow(["pkg_name", "min_version"]) + for pkg, versions in sorted(all_pkgs.items()): + min_version = "0" + if versions: + min_version = str(min_version_spec(versions)).strip() + print("%s\n\tTested versions: %s\n\tMinimum: %s" % (pkg, sorted(list(versions)), min_version)) + csv_writer.writerow([pkg, min_version]) + + +def main(): + """Discover the minimum version of every package referenced in the riotfile + + Writes to stdout and min_versions.csv + """ + pkgs = tree_pkgs_from_riot() + for directory in OUT_DIRECTORIES: + write_out(pkgs, pathlib.Path(directory) / OUT_FILENAME) + + +main() diff --git a/tests/.suitespec.json b/tests/.suitespec.json index 691b32c8219..655561566ca 100644 --- a/tests/.suitespec.json +++ b/tests/.suitespec.json @@ -30,6 +30,8 @@ "tests/lib-injection/dd-lib-python-init-test-django-no-perms/*", "tests/lib-injection/dd-lib-python-init-test-django-pre-installed/*", "tests/lib-injection/dd-lib-python-init-test-django-unsupported-python/*", + "tests/lib-injection/dd-lib-python-init-test-django-unsupported-package/*", + "tests/lib-injection/dd-lib-python-init-test-django-unsupported-package-force/*", "tests/lib-injection/dd-lib-python-init-test-django-uvicorn/*" ], "core": [ diff --git a/tests/lib-injection/dd-lib-python-init-test-django-unsupported-package-force/Dockerfile b/tests/lib-injection/dd-lib-python-init-test-django-unsupported-package-force/Dockerfile new file mode 100644 index 00000000000..05969d26eb7 --- /dev/null +++ b/tests/lib-injection/dd-lib-python-init-test-django-unsupported-package-force/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.11 + +ENV PYTHONUNBUFFERED 1 +ENV DD_INJECT_FORCE 1 +ENV DJANGO_SETTINGS_MODULE django_app +WORKDIR /src +ADD . /src +EXPOSE 18080 +RUN pip install django==4.1.3 structlog==16.0.0 + +CMD python -m django runserver 0.0.0.0:18080 diff --git a/tests/lib-injection/dd-lib-python-init-test-django-unsupported-package-force/django_app.py b/tests/lib-injection/dd-lib-python-init-test-django-unsupported-package-force/django_app.py new file mode 100644 index 00000000000..b73bf3b8782 --- /dev/null +++ b/tests/lib-injection/dd-lib-python-init-test-django-unsupported-package-force/django_app.py @@ -0,0 +1,30 @@ +import os + +from django.http import HttpResponse +from django.urls import path + + +filepath, extension = os.path.splitext(__file__) +ROOT_URLCONF = os.path.basename(filepath) +DEBUG = False +SECRET_KEY = "fdsfdasfa" +ALLOWED_HOSTS = ["*"] + + +def index(request): + import ddtrace + + if ddtrace.__version__ != "2.6.3": + print( + "Assertion failure: unexpected ddtrace version received. Got %r when expecting '2.6.3'" + % ddtrace.__version__ + ) + # Hard exit so traces aren't flushed and the test will fail. + os._exit(1) + + return HttpResponse("test") + + +urlpatterns = [ + path("", index), +] diff --git a/tests/lib-injection/dd-lib-python-init-test-django-unsupported-package/Dockerfile b/tests/lib-injection/dd-lib-python-init-test-django-unsupported-package/Dockerfile new file mode 100644 index 00000000000..31f3634d67a --- /dev/null +++ b/tests/lib-injection/dd-lib-python-init-test-django-unsupported-package/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.7 + +ENV PYTHONUNBUFFERED 1 +ENV DJANGO_SETTINGS_MODULE django_app +WORKDIR /src +ADD . /src +EXPOSE 18080 +RUN pip install django==3.2 falcon==2.0.0 + +CMD python -m django runserver 0.0.0.0:18080 diff --git a/tests/lib-injection/dd-lib-python-init-test-django-unsupported-package/django_app.py b/tests/lib-injection/dd-lib-python-init-test-django-unsupported-package/django_app.py new file mode 100644 index 00000000000..783bd0bbd19 --- /dev/null +++ b/tests/lib-injection/dd-lib-python-init-test-django-unsupported-package/django_app.py @@ -0,0 +1,30 @@ +import os + +from django.http import HttpResponse +from django.urls import path + + +filepath, extension = os.path.splitext(__file__) +ROOT_URLCONF = os.path.basename(filepath) +DEBUG = False +SECRET_KEY = "fdsfdasfa" +ALLOWED_HOSTS = ["*"] + + +def index(request): + import ddtrace + + if ddtrace.__version__ != "1.12.0": + print( + "Assertion failure: unexpected ddtrace version received. Got %r when expecting '1.12.0'" + % ddtrace.__version__ + ) + # Hard exit so traces aren't flushed and the test will fail. + os._exit(1) + + return HttpResponse("test") + + +urlpatterns = [ + path("", index), +] diff --git a/tests/lib-injection/dd-lib-python-init-test-django-unsupported-python/Dockerfile b/tests/lib-injection/dd-lib-python-init-test-django-unsupported-python/Dockerfile index cfc072eb335..1212b633e37 100644 --- a/tests/lib-injection/dd-lib-python-init-test-django-unsupported-python/Dockerfile +++ b/tests/lib-injection/dd-lib-python-init-test-django-unsupported-python/Dockerfile @@ -5,6 +5,6 @@ ENV DJANGO_SETTINGS_MODULE django_app WORKDIR /src ADD . /src EXPOSE 18080 -RUN pip install django==3.2 ddtrace==1.12.0 +RUN pip install django==3.2 -CMD ddtrace-run python -m django runserver 0.0.0.0:18080 +CMD python -m django runserver 0.0.0.0:18080 diff --git a/tests/lib-injection/dd-lib-python-init-test-django/Dockerfile b/tests/lib-injection/dd-lib-python-init-test-django/Dockerfile index a7aa0858a6e..8750acd8ddd 100644 --- a/tests/lib-injection/dd-lib-python-init-test-django/Dockerfile +++ b/tests/lib-injection/dd-lib-python-init-test-django/Dockerfile @@ -1,6 +1,8 @@ FROM python:3.11 ENV PYTHONUNBUFFERED 1 +# intentionally redundant in this test +ENV DD_INJECT_FORCE 1 ENV DJANGO_SETTINGS_MODULE django_app WORKDIR /src ADD . /src