Skip to content

Commit

Permalink
Implement the zyte_api_session_enabled request metadata key
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed May 30, 2024
1 parent 9635994 commit f0a140a
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 10 deletions.
11 changes: 11 additions & 0 deletions docs/reference/meta.rst
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,17 @@ string.
See :ref:`retry`.


.. reqmeta:: zyte_api_session_enabled

zyte_api_session_enabled
=========================

Default: :setting:`ZYTE_API_SESSION_ENABLED`

Whether to use :ref:`scrapy-zyte-api session management <session>` for the
request (``True``) or not (``False``).


.. reqmeta:: zyte_api_session_location

zyte_api_session_location
Expand Down
6 changes: 4 additions & 2 deletions docs/usage/session.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ provide the best of both.
Enabling session management
===========================

To enable session management, set :setting:`ZYTE_API_SESSION_ENABLED` to
``True``.
To enable session management for all requests, set
:setting:`ZYTE_API_SESSION_ENABLED` to ``True``. You can also toggle session
management on or off for specific requests using the
:reqmeta:`zyte_api_session_enabled` request metadata key.

By default, scrapy-zyte-api will maintain up to 8 sessions per domain, each
initialized with a :ref:`browser request <zyte-api-browser>` targeting the URL
Expand Down
21 changes: 13 additions & 8 deletions scrapy_zyte_api/_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.exceptions import CloseSpider, IgnoreRequest, NotConfigured
from scrapy.exceptions import CloseSpider, IgnoreRequest
from scrapy.http import Response
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.misc import create_instance, load_object
Expand Down Expand Up @@ -622,13 +622,14 @@ def from_crawler(cls, crawler: Crawler):
return cls(crawler)

def __init__(self, crawler: Crawler):
if not crawler.settings.getbool("ZYTE_API_SESSION_ENABLED", False):
raise NotConfigured
self._enabled = crawler.settings.getbool("ZYTE_API_SESSION_ENABLED", False)
self._crawler = crawler
self._sessions = _SessionManager(crawler)

async def process_request(self, request: Request, spider: Spider) -> None:
if self._sessions.is_init_request(request):
if not request.meta.get(
"zyte_api_session_enabled", self._enabled
) or self._sessions.is_init_request(request):
return
try:
await self._sessions.assign(request)
Expand All @@ -655,8 +656,10 @@ async def process_request(self, request: Request, spider: Spider) -> None:
async def process_response(
self, request: Request, response: Response, spider: Spider
) -> Union[Request, Response, None]:
if isinstance(response, DummyResponse) or self._sessions.is_init_request(
request
if (
isinstance(response, DummyResponse)
or not request.meta.get("zyte_api_session_enabled", self._enabled)
or self._sessions.is_init_request(request)
):
return response
try:
Expand Down Expand Up @@ -686,8 +689,10 @@ async def process_response(
def process_exception(
self, request: Request, exception: Exception, spider: Spider
) -> Union[Request, None]:
if not isinstance(exception, RequestError) or self._sessions.is_init_request(
request
if (
not isinstance(exception, RequestError)
or not request.meta.get("zyte_api_session_enabled", self._enabled)
or self._sessions.is_init_request(request)
):
return None

Expand Down

0 comments on commit f0a140a

Please sign in to comment.