Skip to content

Commit

Permalink
Index resources in the UrlDispatcher to avoid linear search for most …
Browse files Browse the repository at this point in the history
…cases (#7829)
  • Loading branch information
bdraco committed Nov 27, 2023
1 parent 40a5197 commit 2a3eaa1
Show file tree
Hide file tree
Showing 5 changed files with 191 additions and 24 deletions.
3 changes: 3 additions & 0 deletions CHANGES/7829.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Improved URL handler resolution time by indexing resources in the UrlDispatcher.
For applications with a large number of handlers, this should increase performance significantly.
-- by :user:`bdraco`
80 changes: 67 additions & 13 deletions aiohttp/web_urldispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,13 +716,20 @@ class PrefixedSubAppResource(PrefixResource):
def __init__(self, prefix: str, app: "Application") -> None:
super().__init__(prefix)
self._app = app
for resource in app.router.resources():
resource.add_prefix(prefix)
self._add_prefix_to_resources(prefix)

def add_prefix(self, prefix: str) -> None:
super().add_prefix(prefix)
for resource in self._app.router.resources():
self._add_prefix_to_resources(prefix)

def _add_prefix_to_resources(self, prefix: str) -> None:
router = self._app.router
for resource in router.resources():
# Since the canonical path of a resource is about
# to change, we need to unindex it and then reindex
router.unindex_resource(resource)
resource.add_prefix(prefix)
router.index_resource(resource)

def url_for(self, *args: str, **kwargs: str) -> URL:
raise RuntimeError(".url_for() is not supported " "by sub-application root")
Expand All @@ -731,11 +738,6 @@ def get_info(self) -> _InfoDict:
return {"app": self._app, "prefix": self._prefix}

async def resolve(self, request: Request) -> _Resolve:
if (
not request.url.raw_path.startswith(self._prefix2)
and request.url.raw_path != self._prefix
):
return None, set()
match_info = await self._app.router.resolve(request)
match_info.add_app(self._app)
if isinstance(match_info.http_exception, HTTPMethodNotAllowed):
Expand Down Expand Up @@ -974,27 +976,55 @@ def __contains__(self, route: object) -> bool:

class UrlDispatcher(AbstractRouter, Mapping[str, AbstractResource]):
NAME_SPLIT_RE = re.compile(r"[.:-]")
HTTP_NOT_FOUND = HTTPNotFound()

def __init__(self) -> None:
super().__init__()
self._resources: List[AbstractResource] = []
self._named_resources: Dict[str, AbstractResource] = {}
self._resource_index: dict[str, list[AbstractResource]] = {}
self._matched_sub_app_resources: List[MatchedSubAppResource] = []

async def resolve(self, request: Request) -> UrlMappingMatchInfo:
method = request.method
resource_index = self._resource_index
allowed_methods: Set[str] = set()

for resource in self._resources:
# Walk the url parts looking for candidates. We walk the url backwards
# to ensure the most explicit match is found first. If there are multiple
# candidates for a given url part because there are multiple resources
# registered for the same canonical path, we resolve them in a linear
# fashion to ensure registration order is respected.
url_part = request.rel_url.raw_path
while url_part:
for candidate in resource_index.get(url_part, ()):
match_dict, allowed = await candidate.resolve(request)
if match_dict is not None:
return match_dict
else:
allowed_methods |= allowed
if url_part == "/":
break
url_part = url_part.rpartition("/")[0] or "/"

#
# We didn't find any candidates, so we'll try the matched sub-app
# resources which we have to walk in a linear fashion because they
# have regex/wildcard match rules and we cannot index them.
#
# For most cases we do not expect there to be many of these since
# currently they are only added by `add_domain`
#
for resource in self._matched_sub_app_resources:
match_dict, allowed = await resource.resolve(request)
if match_dict is not None:
return match_dict
else:
allowed_methods |= allowed

if allowed_methods:
return MatchInfoError(HTTPMethodNotAllowed(method, allowed_methods))
else:
return MatchInfoError(HTTPNotFound())
return MatchInfoError(HTTPMethodNotAllowed(request.method, allowed_methods))

return MatchInfoError(self.HTTP_NOT_FOUND)

def __iter__(self) -> Iterator[str]:
return iter(self._named_resources)
Expand Down Expand Up @@ -1050,6 +1080,30 @@ def register_resource(self, resource: AbstractResource) -> None:
self._named_resources[name] = resource
self._resources.append(resource)

if isinstance(resource, MatchedSubAppResource):
# We cannot index match sub-app resources because they have match rules
self._matched_sub_app_resources.append(resource)
else:
self.index_resource(resource)

def _get_resource_index_key(self, resource: AbstractResource) -> str:
"""Return a key to index the resource in the resource index."""
# strip at the first { to allow for variables
return resource.canonical.partition("{")[0].rstrip("/") or "/"

def index_resource(self, resource: AbstractResource) -> None:
"""Add a resource to the resource index."""
resource_key = self._get_resource_index_key(resource)
# There may be multiple resources for a canonical path
# so we keep them in a list to ensure that registration
# order is respected.
self._resource_index.setdefault(resource_key, []).append(resource)

def unindex_resource(self, resource: AbstractResource) -> None:
"""Remove a resource from the resource index."""
resource_key = self._get_resource_index_key(resource)
self._resource_index[resource_key].remove(resource)

def add_resource(self, path: str, *, name: Optional[str] = None) -> Resource:
if path and not path.startswith("/"):
raise ValueError("path should be started with / or be empty")
Expand Down
39 changes: 30 additions & 9 deletions docs/web_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1865,20 +1865,38 @@ unique *name* and at least one :term:`route`.

:term:`web-handler` lookup is performed in the following way:

1. Router iterates over *resources* one-by-one.
2. If *resource* matches to requested URL the resource iterates over
own *routes*.
3. If route matches to requested HTTP method (or ``'*'`` wildcard) the
route's handler is used as found :term:`web-handler`. The lookup is
finished.
4. Otherwise router tries next resource from the *routing table*.
5. If the end of *routing table* is reached and no *resource* /
*route* pair found the *router* returns special :class:`~aiohttp.abc.AbstractMatchInfo`
1. The router splits the URL and checks the index from longest to shortest.
For example, '/one/two/three' will first check the index for
'/one/two/three', then '/one/two' and finally '/'.
2. If the URL part is found in the index, the list of routes for
that URL part is iterated over. If a route matches to requested HTTP
method (or ``'*'`` wildcard) the route's handler is used as the chosen
:term:`web-handler`. The lookup is finished.
3. If the route is not found in the index, the router tries to find
the route in the list of :class:`~aiohttp.web.MatchedSubAppResource`,
(current only created from :meth:`~aiohttp.web.Application.add_domain`),
and will iterate over the list of
:class:`~aiohttp.web.MatchedSubAppResource` in a linear fashion
until a match is found.
4. If no *resource* / *route* pair was found, the *router*
returns the special :class:`~aiohttp.abc.AbstractMatchInfo`
instance with :attr:`aiohttp.abc.AbstractMatchInfo.http_exception` is not ``None``
but :exc:`HTTPException` with either *HTTP 404 Not Found* or
*HTTP 405 Method Not Allowed* status code.
Registered :meth:`~aiohttp.abc.AbstractMatchInfo.handler` raises this exception on call.

Fixed paths are preferred over variable paths. For example,
if you have two routes ``/a/b`` and ``/a/{name}``, then the first
route will always be preferred over the second one.

If there are multiple dynamic paths with the same fixed prefix,
they will be resolved in order of registration.

For example, if you have two dynamic routes that are prefixed
with the fixed ``/users`` path such as ``/users/{x}/{y}/z`` and
``/users/{x}/y/z``, the first one will be preferred over the
second one.

User should never instantiate resource classes but give it by
:meth:`UrlDispatcher.add_resource` call.

Expand All @@ -1900,7 +1918,10 @@ Resource classes hierarchy::
Resource
PlainResource
DynamicResource
PrefixResource
StaticResource
PrefixedSubAppResource
MatchedSubAppResource


.. class:: AbstractResource
Expand Down
7 changes: 7 additions & 0 deletions tests/test_urldispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -1264,10 +1264,17 @@ async def test_prefixed_subapp_overlap(app: Any) -> None:
subapp2.router.add_get("/b", handler2)
app.add_subapp("/ss", subapp2)

subapp3 = web.Application()
handler3 = make_handler()
subapp3.router.add_get("/c", handler3)
app.add_subapp("/s/s", subapp3)

match_info = await app.router.resolve(make_mocked_request("GET", "/s/a"))
assert match_info.route.handler is handler1
match_info = await app.router.resolve(make_mocked_request("GET", "/ss/b"))
assert match_info.route.handler is handler2
match_info = await app.router.resolve(make_mocked_request("GET", "/s/s/c"))
assert match_info.route.handler is handler3


async def test_prefixed_subapp_empty_route(app: Any) -> None:
Expand Down
86 changes: 84 additions & 2 deletions tests/test_web_urldispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from aiohttp import web
from aiohttp.pytest_plugin import AiohttpClient
from aiohttp.web_urldispatcher import SystemRoute
from aiohttp.web_urldispatcher import Resource, SystemRoute


@pytest.mark.parametrize(
Expand Down Expand Up @@ -142,7 +142,6 @@ async def test_access_to_the_file_with_spaces(
r = await client.get(url)
assert r.status == 200
assert (await r.text()) == data
await r.release()


async def test_access_non_existing_resource(
Expand Down Expand Up @@ -544,3 +543,86 @@ async def handler(request: web.Request) -> web.Response:
r = await client.get(yarl.URL(urlencoded_path, encoded=True))
assert r.status == expected_http_resp_status
await r.release()


async def test_order_is_preserved(aiohttp_client: AiohttpClient) -> None:
"""Test route order is preserved.
Note that fixed/static paths are always preferred over a regex path.
"""
app = web.Application()

async def handler(request: web.Request) -> web.Response:
assert isinstance(request.match_info._route.resource, Resource)
return web.Response(text=request.match_info._route.resource.canonical)

app.router.add_get("/first/x/{b}/", handler)
app.router.add_get(r"/first/{x:.*/b}", handler)

app.router.add_get(r"/second/{user}/info", handler)
app.router.add_get("/second/bob/info", handler)

app.router.add_get("/third/bob/info", handler)
app.router.add_get(r"/third/{user}/info", handler)

app.router.add_get(r"/forth/{name:\d+}", handler)
app.router.add_get("/forth/42", handler)

app.router.add_get("/fifth/42", handler)
app.router.add_get(r"/fifth/{name:\d+}", handler)

client = await aiohttp_client(app)

r = await client.get("/first/x/b/")
assert r.status == 200
assert await r.text() == "/first/x/{b}/"

r = await client.get("/second/frank/info")
assert r.status == 200
assert await r.text() == "/second/{user}/info"

# Fixed/static paths are always preferred over regex paths
r = await client.get("/second/bob/info")
assert r.status == 200
assert await r.text() == "/second/bob/info"

r = await client.get("/third/bob/info")
assert r.status == 200
assert await r.text() == "/third/bob/info"

r = await client.get("/third/frank/info")
assert r.status == 200
assert await r.text() == "/third/{user}/info"

r = await client.get("/forth/21")
assert r.status == 200
assert await r.text() == "/forth/{name}"

# Fixed/static paths are always preferred over regex paths
r = await client.get("/forth/42")
assert r.status == 200
assert await r.text() == "/forth/42"

r = await client.get("/fifth/21")
assert r.status == 200
assert await r.text() == "/fifth/{name}"

r = await client.get("/fifth/42")
assert r.status == 200
assert await r.text() == "/fifth/42"


async def test_url_with_many_slashes(aiohttp_client: AiohttpClient) -> None:
app = web.Application()

class MyView(web.View):
async def get(self) -> web.Response:
return web.Response()

app.router.add_routes([web.view("/a", MyView)])

client = await aiohttp_client(app)

r = await client.get("///a")
assert r.status == 200
await r.release()

0 comments on commit 2a3eaa1

Please sign in to comment.