-
-
Notifications
You must be signed in to change notification settings - Fork 4.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Ref matching logic and add basic test
- Loading branch information
Showing
4 changed files
with
64 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,46 +1,57 @@ | ||
from urllib.parse import ParseResult, urlparse | ||
from urllib.parse import urlparse | ||
|
||
from django.http import HttpRequest | ||
|
||
ALLOWED_SCHEMES = ("http", "https") | ||
REFERRER_HEADER = "HTTP_REFERER" # 1 R is the spelling used here: https://docs.djangoproject.com/en/5.1/ref/request-response/ | ||
|
||
|
||
def url_matches(url: ParseResult, target_url: ParseResult) -> bool: | ||
def url_matches(referrer_url: str, target_url: str) -> bool: | ||
""" | ||
Matches a parsed url with a target pattern (also a ParseResult). Checks 3 fields: | ||
* hostname: must be non-empty and equal target.hostname. The left-most subdomain of `url` may be a wildcard (*). | ||
* port: optional for `url`. Must equal target.port, unless it is excluded from target. | ||
* scheme: must be non-empty and equal target.scheme, unless it is excluded from target. | ||
Matches a referrer url with a user-provided one. Checks 3 fields: | ||
* hostname: must equal target.hostname. The left-most subdomain of `url` may be a wildcard (*). | ||
* port: must equal target.port, unless it is excluded from target. | ||
* scheme: must equal target.scheme, unless it is excluded from target. | ||
Note both url's path is ignored. | ||
@param referrer_url: Must have a valid scheme and hostname. | ||
@param target_url: May exclude scheme. | ||
""" | ||
|
||
# We expect hostname exists for both urls. | ||
hostname, target_hostname = url.hostname, target_url.hostname | ||
if hostname.startswith("*."): | ||
hostname = hostname.split(".", 1)[1] | ||
if "." not in target_hostname: | ||
referrer = urlparse(referrer_url) # Always has scheme and hostname | ||
target = urlparse(target_url) | ||
if not target.scheme: # urlparse doesn't work well if scheme is missing | ||
target = urlparse(referrer.scheme + "://" + target_url) | ||
|
||
ref_hostname, target_hostname = referrer.hostname, target.hostname | ||
if ref_hostname.startswith("*."): | ||
ref_hostname = ref_hostname.split(".", 1)[1] | ||
if "." not in target_hostname: # TODO: Could potentially skip | ||
return False | ||
target_hostname = target_hostname.split(".", 1)[1] | ||
|
||
return all( | ||
[ | ||
hostname == target_hostname, | ||
not target_url.port or url.port == target_url.port, | ||
not target_url.scheme or url.scheme == target_url.scheme, | ||
ref_hostname == target_hostname, | ||
not target.port or referrer.port == target.port, | ||
referrer.scheme == target.scheme, | ||
] | ||
) | ||
|
||
|
||
def check_origin(request: HttpRequest, allowed_origins: list[str]) -> tuple[bool, str]: | ||
referrer = request.META.get("HTTP_REFERER") | ||
def validate_scheme_and_origin( | ||
request: HttpRequest, allowed_origins: list[str] | ||
) -> tuple[bool, str]: | ||
referrer: str | None = request.META.get(REFERRER_HEADER) | ||
if referrer: | ||
parsed_ref = urlparse(referrer) | ||
if parsed_ref.scheme not in ALLOWED_SCHEMES: | ||
return False, f"Invalid scheme: {parsed_ref.scheme}" | ||
if not parsed_ref.hostname: | ||
return False, f"Missing hostname in {referrer}" | ||
|
||
for origin in allowed_origins: | ||
parsed_origin = urlparse(origin) | ||
if url_matches(parsed_ref, parsed_origin): | ||
if url_matches(referrer, origin): | ||
return True, f"Matched allowed origin: {origin}" | ||
return False, f"Referrer {referrer} does not match allowed origins." | ||
|
||
return False, "Missing referer header" | ||
return False, f"Missing {REFERRER_HEADER} header" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,33 @@ | ||
# from sentry.toolbar.utils.url import url_matches, check_origin | ||
from sentry.toolbar.utils.url import url_matches | ||
|
||
""" | ||
TODO: | ||
Hostname | ||
- no match | ||
- wildcard (y/n, n for startswith case) | ||
def test_url_matches(): | ||
pass | ||
Port | ||
- not in target | ||
- in target | ||
Scheme | ||
- not in target | ||
- in target | ||
""" | ||
|
||
|
||
def test_url_matches_basic(): | ||
cases = [ | ||
("http://abc.net", "http://abc.net", True), | ||
("http://pocketpair.com", "http://sentry.io", False), | ||
("http://sentry.io/issues", "http://sentry.io", True), | ||
("http://sentry.io", "http://sentry.io/issues", True), | ||
("https://cmu.edu", "https://cmu.com", False), | ||
("https://youtube.com:443/watch?v=3xhb", "https://youtube.com/profile", True), | ||
] | ||
for url, target_url, expected in cases: | ||
assert url_matches(url, target_url) == expected | ||
|
||
|
||
# def test_url_matches_wildcard(): | ||
# pass |
Empty file.