Skip to content

Commit

Permalink
Ref matching logic and add basic test
Browse files Browse the repository at this point in the history
  • Loading branch information
aliu39 committed Sep 20, 2024
1 parent 9fb8572 commit a8d8586
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 25 deletions.
51 changes: 31 additions & 20 deletions src/sentry/toolbar/utils/url.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,57 @@
from urllib.parse import ParseResult, urlparse
from urllib.parse import urlparse

from django.http import HttpRequest

ALLOWED_SCHEMES = ("http", "https")
REFERRER_HEADER = "HTTP_REFERER" # 1 R is the spelling used here: https://docs.djangoproject.com/en/5.1/ref/request-response/


def url_matches(url: ParseResult, target_url: ParseResult) -> bool:
def url_matches(referrer_url: str, target_url: str) -> bool:
"""
Matches a parsed url with a target pattern (also a ParseResult). Checks 3 fields:
* hostname: must be non-empty and equal target.hostname. The left-most subdomain of `url` may be a wildcard (*).
* port: optional for `url`. Must equal target.port, unless it is excluded from target.
* scheme: must be non-empty and equal target.scheme, unless it is excluded from target.
Matches a referrer url with a user-provided one. Checks 3 fields:
* hostname: must equal target.hostname. The left-most subdomain of `url` may be a wildcard (*).
* port: must equal target.port, unless it is excluded from target.
* scheme: must equal target.scheme, unless it is excluded from target.
Note both url's path is ignored.
@param referrer_url: Must have a valid scheme and hostname.
@param target_url: May exclude scheme.
"""

# We expect hostname exists for both urls.
hostname, target_hostname = url.hostname, target_url.hostname
if hostname.startswith("*."):
hostname = hostname.split(".", 1)[1]
if "." not in target_hostname:
referrer = urlparse(referrer_url) # Always has scheme and hostname
target = urlparse(target_url)
if not target.scheme: # urlparse doesn't work well if scheme is missing
target = urlparse(referrer.scheme + "://" + target_url)

ref_hostname, target_hostname = referrer.hostname, target.hostname
if ref_hostname.startswith("*."):
ref_hostname = ref_hostname.split(".", 1)[1]
if "." not in target_hostname: # TODO: Could potentially skip
return False
target_hostname = target_hostname.split(".", 1)[1]

return all(
[
hostname == target_hostname,
not target_url.port or url.port == target_url.port,
not target_url.scheme or url.scheme == target_url.scheme,
ref_hostname == target_hostname,
not target.port or referrer.port == target.port,
referrer.scheme == target.scheme,
]
)


def check_origin(request: HttpRequest, allowed_origins: list[str]) -> tuple[bool, str]:
referrer = request.META.get("HTTP_REFERER")
def validate_scheme_and_origin(
request: HttpRequest, allowed_origins: list[str]
) -> tuple[bool, str]:
referrer: str | None = request.META.get(REFERRER_HEADER)
if referrer:
parsed_ref = urlparse(referrer)
if parsed_ref.scheme not in ALLOWED_SCHEMES:
return False, f"Invalid scheme: {parsed_ref.scheme}"
if not parsed_ref.hostname:
return False, f"Missing hostname in {referrer}"

for origin in allowed_origins:
parsed_origin = urlparse(origin)
if url_matches(parsed_ref, parsed_origin):
if url_matches(referrer, origin):
return True, f"Matched allowed origin: {origin}"
return False, f"Referrer {referrer} does not match allowed origins."

return False, "Missing referer header"
return False, f"Missing {REFERRER_HEADER} header"
4 changes: 2 additions & 2 deletions src/sentry/toolbar/views/iframe_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.toolbar.utils.url import check_origin
from sentry.toolbar.utils.url import validate_scheme_and_origin
from sentry.web.frontend.base import OrganizationView, region_silo_view

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -50,7 +50,7 @@ def get(self, request: HttpRequest, organization, project, *args, **kwargs):
) # TODO: replace with 200 response and template var for "project doesn't exist"

allowed_origins: list[str] = project.get_option("sentry:", validate=lambda val: True)
origin_allowed, info_msg = check_origin(request, allowed_origins)
origin_allowed, info_msg = validate_scheme_and_origin(request, allowed_origins)
if not origin_allowed:
return HttpResponse(
info_msg, status=403

Check warning

Code scanning / CodeQL

Reflected server-side cross-site scripting Medium

Cross-site scripting vulnerability due to a
user-provided value
.
Expand Down
34 changes: 31 additions & 3 deletions tests/sentry/toolbar/utils/test_url.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,33 @@
# from sentry.toolbar.utils.url import url_matches, check_origin
from sentry.toolbar.utils.url import url_matches

"""
TODO:
Hostname
- no match
- wildcard (y/n, n for startswith case)
def test_url_matches():
pass
Port
- not in target
- in target
Scheme
- not in target
- in target
"""


def test_url_matches_basic():
cases = [
("http://abc.net", "http://abc.net", True),
("http://pocketpair.com", "http://sentry.io", False),
("http://sentry.io/issues", "http://sentry.io", True),
("http://sentry.io", "http://sentry.io/issues", True),
("https://cmu.edu", "https://cmu.com", False),
("https://youtube.com:443/watch?v=3xhb", "https://youtube.com/profile", True),
]
for url, target_url, expected in cases:
assert url_matches(url, target_url) == expected


# def test_url_matches_wildcard():
# pass
Empty file.

0 comments on commit a8d8586

Please sign in to comment.