Skip to content

Commit

Permalink
Test and fix add-on configuration of session retry policies
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Jun 4, 2024
1 parent 4548256 commit e2ecfc9
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 38 deletions.
23 changes: 14 additions & 9 deletions scrapy_zyte_api/addon.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
ScrapyZyteAPISpiderMiddleware,
)

from ._session import SESSION_DEFAULT_RETRY_POLICY
from .handler import _load_retry_policy


def _setdefault(settings, setting, cls, pos):
setting_value = settings[setting]
Expand All @@ -27,18 +24,22 @@ def _setdefault(settings, setting, cls, pos):
settings[setting][cls] = pos


# NOTE: We use import paths instead of the classes because retry policy classes
# are not pickleable (https://github.com/jd/tenacity/issues/147), which is a
# Scrapy requirement
# (https://doc.scrapy.org/en/latest/topics/settings.html#compatibility-with-pickle).
_SESSION_RETRY_POLICIES = {
zyte_api_retrying: SESSION_DEFAULT_RETRY_POLICY,
zyte_api_retrying: "scrapy_zyte_api.SESSION_DEFAULT_RETRY_POLICY",
}

try:
from zyte_api import aggressive_retrying
except ImportError:
pass
else:
from ._session import SESSION_AGGRESSIVE_RETRY_POLICY

_SESSION_RETRY_POLICIES[aggressive_retrying] = SESSION_AGGRESSIVE_RETRY_POLICY
_SESSION_RETRY_POLICIES[aggressive_retrying] = (
"scrapy_zyte_api.SESSION_AGGRESSIVE_RETRY_POLICY"
)


class Addon:
Expand Down Expand Up @@ -116,9 +117,13 @@ def update_settings(self, settings: BaseSettings) -> None:
_setdefault(settings, "SCRAPY_POET_PROVIDERS", ZyteApiProvider, 1100)

if settings.getbool("ZYTE_API_SESSION_ENABLED", False):
retry_policy = _load_retry_policy(settings)
loaded_retry_policy = retry_policy = settings.get(
"ZYTE_API_RETRY_POLICY", "zyte_api.zyte_api_retrying"
)
if retry_policy is not None:
loaded_retry_policy = load_object(retry_policy)
settings.set(
"ZYTE_API_RETRY_POLICY",
_SESSION_RETRY_POLICIES.get(retry_policy, retry_policy),
_SESSION_RETRY_POLICIES.get(loaded_retry_policy, retry_policy),
settings.getpriority("ZYTE_API_RETRY_POLICY"),
)
25 changes: 25 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.utils.misc import load_object
from scrapy.utils.test import get_crawler as _get_crawler
from zyte_api.aio.client import AsyncClient

Expand Down Expand Up @@ -96,6 +97,30 @@ async def make_handler(
await handler._close() # NOQA


def serialize_settings(settings):
result = dict(settings)
for setting in (
"ADDONS",
"ZYTE_API_FALLBACK_HTTP_HANDLER",
"ZYTE_API_FALLBACK_HTTPS_HANDLER",
):
if setting in settings:
del result[setting]
for setting in (
"DOWNLOADER_MIDDLEWARES",
"SCRAPY_POET_PROVIDERS",
"SPIDER_MIDDLEWARES",
):
if setting in result:
for key in list(result[setting]):
if isinstance(key, str):
obj = load_object(key)
result[setting][obj] = result[setting].pop(key)
for key in result["DOWNLOAD_HANDLERS"]:
result["DOWNLOAD_HANDLERS"][key] = result["DOWNLOAD_HANDLERS"][key].__class__
return result


@contextmanager
def set_env(**env_vars):
old_environ = dict(environ)
Expand Down
32 changes: 4 additions & 28 deletions tests/test_addon.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pytest_twisted import ensureDeferred
from scrapy import Request
from scrapy.core.downloader.handlers.http import HTTP10DownloadHandler
from scrapy.utils.misc import load_object
from scrapy.utils.test import get_crawler

from scrapy_zyte_api import (
Expand All @@ -15,7 +14,7 @@
from scrapy_zyte_api.handler import ScrapyZyteAPIHTTPDownloadHandler

from . import get_crawler as get_crawler_zyte_api
from . import get_download_handler, make_handler
from . import get_download_handler, make_handler, serialize_settings

pytest.importorskip("scrapy.addons")

Expand Down Expand Up @@ -82,34 +81,11 @@ async def test_addon_fallback_explicit():

@ensureDeferred
async def test_addon_matching_settings():
def serialize(settings):
result = dict(settings)
for setting in (
"ADDONS",
"ZYTE_API_FALLBACK_HTTP_HANDLER",
"ZYTE_API_FALLBACK_HTTPS_HANDLER",
):
if setting in settings:
del result[setting]
for setting in (
"DOWNLOADER_MIDDLEWARES",
"SCRAPY_POET_PROVIDERS",
"SPIDER_MIDDLEWARES",
):
if setting in result:
for key in list(result[setting]):
if isinstance(key, str):
obj = load_object(key)
result[setting][obj] = result[setting].pop(key)
for key in result["DOWNLOAD_HANDLERS"]:
result["DOWNLOAD_HANDLERS"][key] = result["DOWNLOAD_HANDLERS"][
key
].__class__
return result

crawler = await get_crawler_zyte_api({"ZYTE_API_TRANSPARENT_MODE": True})
addon_crawler = await get_crawler_zyte_api(use_addon=True)
assert serialize(crawler.settings) == serialize(addon_crawler.settings)
assert serialize_settings(crawler.settings) == serialize_settings(
addon_crawler.settings
)


@ensureDeferred
Expand Down
65 changes: 64 additions & 1 deletion tests/test_sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
)
from scrapy_zyte_api.utils import _RAW_CLASS_SETTING_SUPPORT, _REQUEST_ERROR_HAS_QUERY

from . import get_crawler
from . import get_crawler, serialize_settings

UNSET = object()

Expand Down Expand Up @@ -907,3 +907,66 @@ async def run():
assert outcome is last_outcome
else:
assert not exhausted


try:
from scrapy import addons # noqa: F401
except ImportError:
ADDON_SUPPORT = False
else:
ADDON_SUPPORT = True


@pytest.mark.parametrize(
("manual_settings", "addon_settings"),
(
(
{"ZYTE_API_RETRY_POLICY": "scrapy_zyte_api.SESSION_DEFAULT_RETRY_POLICY"},
{},
),
(
{"ZYTE_API_RETRY_POLICY": "scrapy_zyte_api.SESSION_DEFAULT_RETRY_POLICY"},
{"ZYTE_API_RETRY_POLICY": "zyte_api.zyte_api_retrying"},
),
(
{
"ZYTE_API_RETRY_POLICY": "scrapy_zyte_api.SESSION_AGGRESSIVE_RETRY_POLICY"
},
{"ZYTE_API_RETRY_POLICY": "zyte_api.aggressive_retrying"},
),
(
{"ZYTE_API_RETRY_POLICY": "scrapy_zyte_api.SESSION_DEFAULT_RETRY_POLICY"},
{"ZYTE_API_RETRY_POLICY": "scrapy_zyte_api.SESSION_DEFAULT_RETRY_POLICY"},
),
(
{
"ZYTE_API_RETRY_POLICY": "scrapy_zyte_api.SESSION_AGGRESSIVE_RETRY_POLICY"
},
{
"ZYTE_API_RETRY_POLICY": "scrapy_zyte_api.SESSION_AGGRESSIVE_RETRY_POLICY"
},
),
(
{"ZYTE_API_RETRY_POLICY": "tests.test_sessions.UNSET"},
{"ZYTE_API_RETRY_POLICY": "tests.test_sessions.UNSET"},
),
),
)
@ensureDeferred
@pytest.mark.skipif(
not ADDON_SUPPORT, reason="No add-on support in this version of Scrapy"
)
async def test_addon(manual_settings, addon_settings):
crawler = await get_crawler(
{
"ZYTE_API_TRANSPARENT_MODE": True,
"ZYTE_API_SESSION_ENABLED": True,
**manual_settings,
}
)
addon_crawler = await get_crawler(
{"ZYTE_API_SESSION_ENABLED": True, **addon_settings}, use_addon=True
)
assert serialize_settings(crawler.settings) == serialize_settings(
addon_crawler.settings
)

0 comments on commit e2ecfc9

Please sign in to comment.