diff --git a/.flake8 b/.flake8 index a0924f947d..8fc8507d92 100644 --- a/.flake8 +++ b/.flake8 @@ -3,6 +3,7 @@ ignore = E501 # line too long, defer to black F401 # unused import, defer to pylint W503 # allow line breaks after binary ops, not after + E203 # allow whitespace before ':' (https://github.com/psf/black#slices) exclude = .bzr .git diff --git a/ext/opentelemetry-ext-flask/src/opentelemetry/ext/flask/__init__.py b/ext/opentelemetry-ext-flask/src/opentelemetry/ext/flask/__init__.py index eedc8d5998..477d237af0 100644 --- a/ext/opentelemetry-ext-flask/src/opentelemetry/ext/flask/__init__.py +++ b/ext/opentelemetry-ext-flask/src/opentelemetry/ext/flask/__init__.py @@ -62,18 +62,21 @@ def _before_flask_request(): tracer = trace.tracer() + attributes = otel_wsgi.collect_request_attributes(environ) + if flask_request.url_rule: + # For 404 that result from no route found, etc, we don't have a url_rule. + attributes["http.route"] = flask_request.url_rule.rule span = tracer.create_span( - span_name, parent_span, kind=trace.SpanKind.SERVER + span_name, + parent_span, + kind=trace.SpanKind.SERVER, + attributes=attributes, ) span.start(environ.get(_ENVIRON_STARTTIME_KEY)) activation = tracer.use_span(span, end_on_exit=True) activation.__enter__() environ[_ENVIRON_ACTIVATION_KEY] = activation environ[_ENVIRON_SPAN_KEY] = span - otel_wsgi.add_request_attributes(span, environ) - if flask_request.url_rule: - # For 404 that result from no route found, etc, we don't have a url_rule. - span.set_attribute("http.route", flask_request.url_rule.rule) def _teardown_flask_request(exc): diff --git a/ext/opentelemetry-ext-flask/tests/test_flask_integration.py b/ext/opentelemetry-ext-flask/tests/test_flask_integration.py index dfb9dee885..353778c380 100644 --- a/ext/opentelemetry-ext-flask/tests/test_flask_integration.py +++ b/ext/opentelemetry-ext-flask/tests/test_flask_integration.py @@ -56,6 +56,17 @@ def test_simple(self): "hello_endpoint", trace_api.INVALID_SPAN_CONTEXT, kind=trace_api.SpanKind.SERVER, + attributes={ + "component": "http", + "http.method": "GET", + "http.server_name": "localhost", + "http.scheme": "http", + "host.port": 80, + "http.host": "localhost", + "http.target": "/hello/123", + "http.flavor": "1.1", + "http.route": "/hello/", + }, ) self.assertEqual(1, self.span.start.call_count) @@ -63,15 +74,7 @@ def test_simple(self): self.assertEqual( self.span_attrs, - { - "component": "http", - "http.method": "GET", - "http.host": "localhost", - "http.url": "http://localhost/hello/123", - "http.route": "/hello/", - "http.status_code": 200, - "http.status_text": "OK", - }, + {"http.status_code": 200, "http.status_text": "OK"}, ) def test_404(self): @@ -83,6 +86,16 @@ def test_404(self): "/bye", trace_api.INVALID_SPAN_CONTEXT, kind=trace_api.SpanKind.SERVER, + attributes={ + "component": "http", + "http.method": "POST", + "http.server_name": "localhost", + "http.scheme": "http", + "host.port": 80, + "http.host": "localhost", + "http.target": "/bye", + "http.flavor": "1.1", + }, ) self.assertEqual(1, self.span.start.call_count) @@ -92,14 +105,7 @@ def test_404(self): self.assertEqual( self.span_attrs, - { - "component": "http", - "http.method": "POST", - "http.host": "localhost", - "http.url": "http://localhost/bye", - "http.status_code": 404, - "http.status_text": "NOT FOUND", - }, + {"http.status_code": 404, "http.status_text": "NOT FOUND"}, ) def test_internal_error(self): @@ -111,6 +117,17 @@ def test_internal_error(self): "hello_endpoint", trace_api.INVALID_SPAN_CONTEXT, kind=trace_api.SpanKind.SERVER, + attributes={ + "component": "http", + "http.method": "GET", + "http.server_name": "localhost", + "http.scheme": "http", + "host.port": 80, + "http.host": "localhost", + "http.target": "/hello/500", + "http.flavor": "1.1", + "http.route": "/hello/", + }, ) self.assertEqual(1, self.span.start.call_count) @@ -121,11 +138,6 @@ def test_internal_error(self): self.assertEqual( self.span_attrs, { - "component": "http", - "http.method": "GET", - "http.host": "localhost", - "http.url": "http://localhost/hello/500", - "http.route": "/hello/", "http.status_code": 500, "http.status_text": "INTERNAL SERVER ERROR", }, diff --git a/ext/opentelemetry-ext-wsgi/src/opentelemetry/ext/wsgi/__init__.py b/ext/opentelemetry-ext-wsgi/src/opentelemetry/ext/wsgi/__init__.py index a2cf163342..327ca95678 100644 --- a/ext/opentelemetry-ext-wsgi/src/opentelemetry/ext/wsgi/__init__.py +++ b/ext/opentelemetry-ext-wsgi/src/opentelemetry/ext/wsgi/__init__.py @@ -21,11 +21,15 @@ import functools import typing import wsgiref.util as wsgiref_util +from http import HTTPStatus from opentelemetry import propagators, trace from opentelemetry.ext.wsgi.version import __version__ # noqa +from opentelemetry.trace.status import Status, StatusCanonicalCode from opentelemetry.util import time_ns +_HTTP_VERSION_PREFIX = "HTTP/" + def get_header_from_environ( environ: dict, header_name: str @@ -42,44 +46,80 @@ def get_header_from_environ( return [] -def add_request_attributes(span, environ): - """Adds HTTP request attributes from the PEP3333-conforming WSGI environ to span.""" - - span.set_attribute("component", "http") - span.set_attribute("http.method", environ["REQUEST_METHOD"]) - - host = environ.get("HTTP_HOST") - if not host: - host = environ["SERVER_NAME"] - port = environ["SERVER_PORT"] - scheme = environ["wsgi.url_scheme"] - if ( - scheme == "http" - and port != "80" - or scheme == "https" - and port != "443" - ): - host += ":" + port - - # NOTE: Nonstandard (but see - # https://github.com/open-telemetry/opentelemetry-specification/pull/263) - span.set_attribute("http.host", host) - - url = environ.get("REQUEST_URI") or environ.get("RAW_URI") - - if url: - if url[0] == "/": - # We assume that no scheme-relative URLs will be in url here. - # After all, if a request is made to http://myserver//foo, we may get - # //foo which looks like scheme-relative but isn't. - url = environ["wsgi.url_scheme"] + "://" + host + url - elif not url.startswith(environ["wsgi.url_scheme"] + ":"): - # Something fishy is in RAW_URL. Let's fall back to request_uri() - url = wsgiref_util.request_uri(environ) +def setifnotnone(dic, key, value): + if value is not None: + dic[key] = value + + +def http_status_to_canonical_code(code: int, allow_redirect: bool = True): + # pylint:disable=too-many-branches,too-many-return-statements + if code < 100: + return StatusCanonicalCode.UNKNOWN + if code <= 299: + return StatusCanonicalCode.OK + if code <= 399: + if allow_redirect: + return StatusCanonicalCode.OK + return StatusCanonicalCode.DEADLINE_EXCEEDED + if code <= 499: + if code == HTTPStatus.UNAUTHORIZED: + return StatusCanonicalCode.UNAUTHENTICATED + if code == HTTPStatus.FORBIDDEN: + return StatusCanonicalCode.PERMISSION_DENIED + if code == HTTPStatus.NOT_FOUND: + return StatusCanonicalCode.NOT_FOUND + if code == HTTPStatus.TOO_MANY_REQUESTS: + return StatusCanonicalCode.RESOURCE_EXHAUSTED + return StatusCanonicalCode.INVALID_ARGUMENT + if code <= 599: + if code == HTTPStatus.NOT_IMPLEMENTED: + return StatusCanonicalCode.UNIMPLEMENTED + if code == HTTPStatus.SERVICE_UNAVAILABLE: + return StatusCanonicalCode.UNAVAILABLE + if code == HTTPStatus.GATEWAY_TIMEOUT: + return StatusCanonicalCode.DEADLINE_EXCEEDED + return StatusCanonicalCode.INTERNAL + return StatusCanonicalCode.UNKNOWN + + +def collect_request_attributes(environ): + """Collects HTTP request attributes from the PEP3333-conforming + WSGI environ and returns a dictionary to be used as span creation attributes.""" + + result = { + "component": "http", + "http.method": environ["REQUEST_METHOD"], + "http.server_name": environ["SERVER_NAME"], + "http.scheme": environ["wsgi.url_scheme"], + "host.port": int(environ["SERVER_PORT"]), + } + + setifnotnone(result, "http.host", environ.get("HTTP_HOST")) + target = environ.get("RAW_URI") + if target is None: # Note: `"" or None is None` + target = environ.get("REQUEST_URI") + if target is not None: + result["http.target"] = target else: - url = wsgiref_util.request_uri(environ) + result["http.url"] = wsgiref_util.request_uri(environ) + + remote_addr = environ.get("REMOTE_ADDR") + if remote_addr: + result[ + "peer.ipv6" if ":" in remote_addr else "peer.ipv4" + ] = remote_addr + remote_host = environ.get("REMOTE_HOST") + if remote_host and remote_host != remote_addr: + result["peer.hostname"] = remote_host - span.set_attribute("http.url", url) + setifnotnone(result, "peer.port", environ.get("REMOTE_PORT")) + flavor = environ.get("SERVER_PROTOCOL", "") + if flavor.upper().startswith(_HTTP_VERSION_PREFIX): + flavor = flavor[len(_HTTP_VERSION_PREFIX) :] + if flavor: + result["http.flavor"] = flavor + + return result def add_response_attributes( @@ -94,9 +134,15 @@ def add_response_attributes( try: status_code = int(status_code) except ValueError: - pass + span.set_status( + Status( + StatusCanonicalCode.UNKNOWN, + "Non-integer HTTP status: " + repr(status_code), + ) + ) else: span.set_attribute("http.status_code", status_code) + span.set_status(Status(http_status_to_canonical_code(status_code))) def get_default_span_name(environ): @@ -145,13 +191,15 @@ def __call__(self, environ, start_response): span_name = get_default_span_name(environ) span = tracer.create_span( - span_name, parent_span, kind=trace.SpanKind.SERVER + span_name, + parent_span, + kind=trace.SpanKind.SERVER, + attributes=collect_request_attributes(environ), ) span.start(start_timestamp) try: with tracer.use_span(span): - add_request_attributes(span, environ) start_response = self._create_start_response( span, start_response ) @@ -159,6 +207,7 @@ def __call__(self, environ, start_response): iterable = self.wsgi(environ, start_response) return _end_span_after_iterating(iterable, span, tracer) except: # noqa + # TODO Set span status (cf. https://github.com/open-telemetry/opentelemetry-python/issues/292) span.end() raise diff --git a/ext/opentelemetry-ext-wsgi/tests/test_wsgi_middleware.py b/ext/opentelemetry-ext-wsgi/tests/test_wsgi_middleware.py index 9c77acbb75..c46fbb2ae3 100644 --- a/ext/opentelemetry-ext-wsgi/tests/test_wsgi_middleware.py +++ b/ext/opentelemetry-ext-wsgi/tests/test_wsgi_middleware.py @@ -16,7 +16,7 @@ import unittest import unittest.mock as mock import wsgiref.util as wsgiref_util -from urllib.parse import urlparse +from urllib.parse import urlsplit import opentelemetry.ext.wsgi as otel_wsgi from opentelemetry import trace as trace_api @@ -97,7 +97,19 @@ def validate_response(self, response, error=None): # Verify that start_span has been called self.create_span.assert_called_with( - "/", trace_api.INVALID_SPAN_CONTEXT, kind=trace_api.SpanKind.SERVER + "/", + trace_api.INVALID_SPAN_CONTEXT, + kind=trace_api.SpanKind.SERVER, + attributes={ + "component": "http", + "http.method": "GET", + "http.server_name": "127.0.0.1", + "http.scheme": "http", + "host.port": 80, + "http.host": "127.0.0.1", + "http.flavor": "1.0", + "http.url": "http://127.0.0.1/", + }, ) self.assertEqual(1, self.span.start.call_count) @@ -145,31 +157,43 @@ def setUp(self): def test_request_attributes(self): self.environ["QUERY_STRING"] = "foo=bar" - otel_wsgi.add_request_attributes(self.span, self.environ) - - expected = ( - mock.call("component", "http"), - mock.call("http.method", "GET"), - mock.call("http.host", "127.0.0.1"), - mock.call("http.url", "http://127.0.0.1/?foo=bar"), + attrs = otel_wsgi.collect_request_attributes(self.environ) + self.assertDictEqual( + attrs, + { + "component": "http", + "http.method": "GET", + "http.host": "127.0.0.1", + "http.url": "http://127.0.0.1/?foo=bar", + "host.port": 80, + "http.scheme": "http", + "http.server_name": "127.0.0.1", + "http.flavor": "1.0", + }, ) - self.assertEqual(self.span.set_attribute.call_count, len(expected)) - self.span.set_attribute.assert_has_calls(expected, any_order=True) - def validate_url(self, expected_url): - otel_wsgi.add_request_attributes(self.span, self.environ) - attrs = { - args[0][0]: args[0][1] - for args in self.span.set_attribute.call_args_list + def validate_url(self, expected_url, raw=False, has_host=True): + parts = urlsplit(expected_url) + expected = { + "http.scheme": parts.scheme, + "host.port": parts.port or (80 if parts.scheme == "http" else 443), + "http.server_name": parts.hostname, # Not true in the general case, but for all tests. } - self.assertIn("http.url", attrs) - self.assertEqual(attrs["http.url"], expected_url) - self.assertIn("http.host", attrs) - self.assertEqual(attrs["http.host"], urlparse(expected_url).netloc) + if raw: + expected["http.target"] = expected_url.split(parts.netloc, 1)[1] + else: + expected["http.url"] = expected_url + if has_host: + expected["http.host"] = parts.hostname + + attrs = otel_wsgi.collect_request_attributes(self.environ) + self.assertGreaterEqual( + attrs.items(), expected.items(), expected_url + " expected." + ) def test_request_attributes_with_partial_raw_uri(self): self.environ["RAW_URI"] = "/#top" - self.validate_url("http://127.0.0.1/#top") + self.validate_url("http://127.0.0.1/#top", raw=True) def test_request_attributes_with_partial_raw_uri_and_nonstandard_port( self, @@ -177,58 +201,80 @@ def test_request_attributes_with_partial_raw_uri_and_nonstandard_port( self.environ["RAW_URI"] = "/?" del self.environ["HTTP_HOST"] self.environ["SERVER_PORT"] = "8080" - self.validate_url("http://127.0.0.1:8080/?") + self.validate_url("http://127.0.0.1:8080/?", raw=True, has_host=False) def test_https_uri_port(self): del self.environ["HTTP_HOST"] self.environ["SERVER_PORT"] = "443" self.environ["wsgi.url_scheme"] = "https" - self.validate_url("https://127.0.0.1/") + self.validate_url("https://127.0.0.1/", has_host=False) self.environ["SERVER_PORT"] = "8080" - self.validate_url("https://127.0.0.1:8080/") + self.validate_url("https://127.0.0.1:8080/", has_host=False) self.environ["SERVER_PORT"] = "80" - self.validate_url("https://127.0.0.1:80/") + self.validate_url("https://127.0.0.1:80/", has_host=False) def test_http_uri_port(self): del self.environ["HTTP_HOST"] self.environ["SERVER_PORT"] = "80" self.environ["wsgi.url_scheme"] = "http" - self.validate_url("http://127.0.0.1/") + self.validate_url("http://127.0.0.1/", has_host=False) self.environ["SERVER_PORT"] = "8080" - self.validate_url("http://127.0.0.1:8080/") + self.validate_url("http://127.0.0.1:8080/", has_host=False) self.environ["SERVER_PORT"] = "443" - self.validate_url("http://127.0.0.1:443/") + self.validate_url("http://127.0.0.1:443/", has_host=False) def test_request_attributes_with_nonstandard_port_and_no_host(self): del self.environ["HTTP_HOST"] self.environ["SERVER_PORT"] = "8080" - self.validate_url("http://127.0.0.1:8080/") + self.validate_url("http://127.0.0.1:8080/", has_host=False) self.environ["SERVER_PORT"] = "443" - self.validate_url("http://127.0.0.1:443/") - - def test_request_attributes_with_nonstandard_port(self): - self.environ["HTTP_HOST"] += ":8080" - self.validate_url("http://127.0.0.1:8080/") + self.validate_url("http://127.0.0.1:443/", has_host=False) + + def test_request_attributes_with_conflicting_nonstandard_port(self): + self.environ[ + "HTTP_HOST" + ] += ":8080" # Note that we do not correct SERVER_PORT + expected = { + "http.host": "127.0.0.1:8080", + "http.url": "http://127.0.0.1:8080/", + "host.port": 80, + } + self.assertGreaterEqual( + otel_wsgi.collect_request_attributes(self.environ).items(), + expected.items(), + ) def test_request_attributes_with_faux_scheme_relative_raw_uri(self): self.environ["RAW_URI"] = "//127.0.0.1/?" - self.validate_url("http://127.0.0.1//127.0.0.1/?") - - def test_request_attributes_with_pathless_raw_uri(self): - self.environ["PATH_INFO"] = "" - self.environ["RAW_URI"] = "http://hello" - self.environ["HTTP_HOST"] = "hello" - self.validate_url("http://hello") + self.validate_url("http://127.0.0.1//127.0.0.1/?", raw=True) + + def test_request_attributes_pathless(self): + self.environ["RAW_URI"] = "" + expected = {"http.target": ""} + self.assertGreaterEqual( + otel_wsgi.collect_request_attributes(self.environ).items(), + expected.items(), + ) def test_request_attributes_with_full_request_uri(self): self.environ["HTTP_HOST"] = "127.0.0.1:8080" - self.environ["REQUEST_URI"] = "http://127.0.0.1:8080/?foo=bar#top" - self.validate_url("http://127.0.0.1:8080/?foo=bar#top") + self.environ["REQUEST_METHOD"] = "CONNECT" + self.environ[ + "REQUEST_URI" + ] = "127.0.0.1:8080" # Might happen in a CONNECT request + expected = { + "http.host": "127.0.0.1:8080", + "http.target": "127.0.0.1:8080", + } + self.assertGreaterEqual( + otel_wsgi.collect_request_attributes(self.environ).items(), + expected.items(), + ) def test_response_attributes(self): otel_wsgi.add_response_attributes(self.span, "404 Not Found", {})