Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Copy known zyte_api_session-prefixed meta into session initialization requests #205

Merged
merged 5 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ repos:
hooks:
- id: black
- repo: https://github.com/pycqa/flake8
rev: 7.0.0
rev: 7.1.0
hooks:
- id: flake8
additional_dependencies:
Expand Down
7 changes: 7 additions & 0 deletions docs/usage/session.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,13 @@ define a separate :ref:`session config override <session-configs>` for each
website, each with its own implementation of
:meth:`~scrapy_zyte_api.SessionConfig.check`.

The :reqmeta:`zyte_api_session_location` and :reqmeta:`zyte_api_session_params`
request metadata keys, if present in a request that triggers a session
initialization request, will be copied into the session initialization request,
so that they are available when :setting:`ZYTE_API_SESSION_CHECKER` or
:meth:`~scrapy_zyte_api.SessionConfig.check` are called for a session
initialization request.

If your session checking implementation relies on the response body (e.g. it
uses CSS or XPath expressions), you should make sure that you are getting one,
which might not be the case if you are mostly using :ref:`Zyte API automatic
Expand Down
5 changes: 5 additions & 0 deletions scrapy_zyte_api/_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,11 @@ async def _init_session(self, session_id: str, request: Request, pool: str) -> b
SESSION_INIT_META_KEY: True,
"dont_merge_cookies": True,
"zyte_api": {**session_params, "session": {"id": session_id}},
**{
k: v
for k, v in request.meta.items()
if k in {"zyte_api_session_location", "zyte_api_session_params"}
},
},
callback=NO_CALLBACK,
)
Expand Down
79 changes: 79 additions & 0 deletions tests/test_sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,85 @@ def parse(self, response):
session_config_registry.__init__() # type: ignore[misc]


@ensureDeferred
async def test_session_config_check_meta(mockserver):
"""When initializing a session, known zyte_api_session-prefixed params
should be included in the session initialization request, so that they can
be used from check methods validating those requests.

For example, when validating a location, access to
zyte_api_session_location may be necessary.
"""
pytest.importorskip("web_poet")

params = {
"actions": [
{
"action": "setLocation",
"address": {"postalCode": "10001"},
}
]
}

@session_config(["example.com"])
class CustomSessionConfig(SessionConfig):

def check(self, response, request):
return (
bool(self.location(request))
and response.meta["zyte_api_session_params"] == params
and (
(
response.meta.get("_is_session_init_request", False)
and "zyte_api_session_foo" not in response.meta
)
or response.meta["zyte_api_session_foo"] == "bar"
)
)

settings = {
"RETRY_TIMES": 0,
"ZYTE_API_URL": mockserver.urljoin("/"),
"ZYTE_API_SESSION_ENABLED": True,
"ZYTE_API_SESSION_MAX_BAD_INITS": 1,
}

class TestSpider(Spider):
name = "test"
start_urls = ["https://example.com"]

def start_requests(self):
for url in self.start_urls:
yield Request(
url,
meta={
"zyte_api_automap": params,
"zyte_api_session_params": params,
"zyte_api_session_location": {"postalCode": "10001"},
"zyte_api_session_foo": "bar",
},
)

def parse(self, response):
pass

crawler = await get_crawler(settings, spider_cls=TestSpider, setup_engine=False)
await crawler.crawl()

session_stats = {
k: v
for k, v in crawler.stats.get_stats().items()
if k.startswith("scrapy-zyte-api/sessions")
}
assert session_stats == {
"scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 1,
"scrapy-zyte-api/sessions/pools/example.com/use/check-passed": 1,
}

# Clean up the session config registry.
session_config_registry.__init__() # type: ignore[misc]


@ensureDeferred
async def test_session_config_param_error(mockserver):
pytest.importorskip("web_poet")
Expand Down