Skip to content

Commit

Permalink
Reduce overhead to check if a host is an IP Address (#9095)
Browse files Browse the repository at this point in the history
  • Loading branch information
bdraco committed Sep 9, 2024
1 parent 2b5a4b9 commit ffcf9dc
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 46 deletions.
1 change: 1 addition & 0 deletions CHANGES/9095.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improved performance of checking if a host is an IP Address -- by :user:`bdraco`.
66 changes: 36 additions & 30 deletions aiohttp/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
List,
Mapping,
Optional,
Pattern,
Protocol,
Tuple,
Type,
Expand Down Expand Up @@ -484,44 +483,51 @@ def __set__(self, inst: _TSelf[_T], value: _T) -> None:
except ImportError:
pass

_ipv4_pattern = (
r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}"
r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"
)
_ipv6_pattern = (
r"^(?:(?:(?:[A-F0-9]{1,4}:){6}|(?=(?:[A-F0-9]{0,4}:){0,6}"
r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}$)(([0-9A-F]{1,4}:){0,5}|:)"
r"((:[0-9A-F]{1,4}){1,5}:|:)|::(?:[A-F0-9]{1,4}:){5})"
r"(?:(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\.){3}"
r"(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])|(?:[A-F0-9]{1,4}:){7}"
r"[A-F0-9]{1,4}|(?=(?:[A-F0-9]{0,4}:){0,7}[A-F0-9]{0,4}$)"
r"(([0-9A-F]{1,4}:){1,7}|:)((:[0-9A-F]{1,4}){1,7}|:)|(?:[A-F0-9]{1,4}:){7}"
r":|:(:[A-F0-9]{1,4}){7})$"
)
_ipv4_regex = re.compile(_ipv4_pattern)
_ipv6_regex = re.compile(_ipv6_pattern, flags=re.IGNORECASE)
_ipv4_regexb = re.compile(_ipv4_pattern.encode("ascii"))
_ipv6_regexb = re.compile(_ipv6_pattern.encode("ascii"), flags=re.IGNORECASE)

def is_ipv4_address(host: Optional[Union[str, bytes]]) -> bool:
"""Check if host looks like an IPv4 address.
def _is_ip_address(
regex: Pattern[str], regexb: Pattern[bytes], host: Optional[Union[str, bytes]]
) -> bool:
if host is None:
This function does not validate that the format is correct, only that
the host is a str or bytes, and its all numeric.
This check is only meant as a heuristic to ensure that
a host is not a domain name.
"""
if not host:
return False
# For a host to be an ipv4 address, it must be all numeric.
if isinstance(host, str):
return bool(regex.match(host))
elif isinstance(host, (bytes, bytearray, memoryview)):
return bool(regexb.match(host))
else:
raise TypeError(f"{host} [{type(host)}] is not a str or bytes")
return host.replace(".", "").isdigit()
if isinstance(host, (bytes, bytearray, memoryview)):
return host.decode("ascii").replace(".", "").isdigit()
raise TypeError(f"{host} [{type(host)}] is not a str or bytes")


is_ipv4_address = functools.partial(_is_ip_address, _ipv4_regex, _ipv4_regexb)
is_ipv6_address = functools.partial(_is_ip_address, _ipv6_regex, _ipv6_regexb)
def is_ipv6_address(host: Optional[Union[str, bytes]]) -> bool:
"""Check if host looks like an IPv6 address.
This function does not validate that the format is correct, only that
the host contains a colon and that it is a str or bytes.
This check is only meant as a heuristic to ensure that
a host is not a domain name.
"""
if not host:
return False
# The host must contain a colon to be an IPv6 address.
if isinstance(host, str):
return ":" in host
if isinstance(host, (bytes, bytearray, memoryview)):
return b":" in host
raise TypeError(f"{host} [{type(host)}] is not a str or bytes")


def is_ip_address(host: Optional[Union[str, bytes, bytearray, memoryview]]) -> bool:
"""Check if host looks like an IP Address.
This check is only meant as a heuristic to ensure that
a host is not a domain name.
"""
return is_ipv4_address(host) or is_ipv6_address(host)


Expand Down
28 changes: 12 additions & 16 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,14 +286,6 @@ def test_is_ip_address() -> None:
assert not helpers.is_ip_address("localhost")
assert not helpers.is_ip_address("www.example.com")

# Out of range
assert not helpers.is_ip_address("999.999.999.999")
# Contain a port
assert not helpers.is_ip_address("127.0.0.1:80")
assert not helpers.is_ip_address("[2001:db8:0:1]:80")
# Too many "::"
assert not helpers.is_ip_address("1200::AB00:1234::2552:7777:1313")


def test_is_ip_address_bytes() -> None:
assert helpers.is_ip_address(b"127.0.0.1")
Expand All @@ -304,14 +296,6 @@ def test_is_ip_address_bytes() -> None:
assert not helpers.is_ip_address(b"localhost")
assert not helpers.is_ip_address(b"www.example.com")

# Out of range
assert not helpers.is_ip_address(b"999.999.999.999")
# Contain a port
assert not helpers.is_ip_address(b"127.0.0.1:80")
assert not helpers.is_ip_address(b"[2001:db8:0:1]:80")
# Too many "::"
assert not helpers.is_ip_address(b"1200::AB00:1234::2552:7777:1313")


def test_ipv4_addresses() -> None:
ip_addresses = [
Expand Down Expand Up @@ -360,6 +344,18 @@ def test_is_ip_address_invalid_type() -> None:
with pytest.raises(TypeError):
helpers.is_ip_address(object()) # type: ignore[arg-type]

with pytest.raises(TypeError):
helpers.is_ipv4_address(123) # type: ignore[arg-type]

with pytest.raises(TypeError):
helpers.is_ipv4_address(object()) # type: ignore[arg-type]

with pytest.raises(TypeError):
helpers.is_ipv6_address(123) # type: ignore[arg-type]

with pytest.raises(TypeError):
helpers.is_ipv6_address(object()) # type: ignore[arg-type]


# ----------------------------------- TimeoutHandle -------------------

Expand Down

0 comments on commit ffcf9dc

Please sign in to comment.