Skip to content

Commit

Permalink
Merge branch 'main' into task/add-href-to-internal-links--/CDD-2187
Browse files Browse the repository at this point in the history
  • Loading branch information
A-Ashiq committed Sep 9, 2024
2 parents 33ce7d5 + 86685df commit 7dbc5f1
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 8 deletions.
31 changes: 30 additions & 1 deletion caching/public_api/crawler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from multiprocessing.dummy import Pool as ThreadPool
from urllib.parse import urljoin

import requests
Expand All @@ -25,6 +26,9 @@ def __init__(
request_timeout: int = DEFAULT_REQUEST_TIMEOUT,
):
self._public_api_base_url = public_api_base_url
self._public_api_base_url_v2 = urljoin(
base=self._public_api_base_url, url="v2/"
)
self._cdn_auth_key = cdn_auth_key
self._request_timeout = request_timeout

Expand Down Expand Up @@ -229,6 +233,17 @@ def crawl_public_api_themes_path(self) -> None:
logger.info("Crawling from root URL %s", public_api_themes_root_path)
self.crawl(url=public_api_themes_root_path, crawled_urls=[])

def crawl_public_api_themes_path_v2(self) -> None:
"""Crawls the public API from the root `themes/` path
Returns:
None
"""
public_api_themes_root_path = self._build_themes_root_path_v2()
logger.info("Crawling from root URL %s", public_api_themes_root_path)
self.crawl(url=public_api_themes_root_path, crawled_urls=[])

def _build_themes_root_path(self) -> str:
"""Builds the full URL for the root themes/ path
Expand All @@ -239,6 +254,16 @@ def _build_themes_root_path(self) -> str:
"""
return urljoin(base=self._public_api_base_url, url="themes/")

def _build_themes_root_path_v2(self) -> str:
"""Builds the full URL for the root themes/ path
Returns:
The full URL for the root themes pathx
which can be passed to requests
"""
return urljoin(base=self._public_api_base_url_v2, url="themes/")

def process_all_routes(self) -> None:
"""Crawls the public API in its entirety
Expand All @@ -251,4 +276,8 @@ def process_all_routes(self) -> None:
None
"""
self.crawl_public_api_themes_path()
with ThreadPool() as pool:
pool.apply_async(self.crawl_public_api_themes_path)
pool.apply_async(self.crawl_public_api_themes_path_v2)
pool.close()
pool.join()
6 changes: 3 additions & 3 deletions requirements-prod.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ drf-nested-routers==0.94.1
drf-spectacular==0.27.2
et-xmlfile==1.1.0
exceptiongroup==1.2.2
filelock==3.15.4
filelock==3.16.0
grimp==3.4.1
gunicorn==23.0.0
html5lib==1.1
Expand Down Expand Up @@ -55,7 +55,7 @@ pandas==2.2.1
pathspec==0.12.1
pbr==6.0.0
Pillow==10.4.0
platformdirs==4.2.2
platformdirs==4.3.2
plotly==5.24.0
pluggy==1.5.0
pyparsing==3.1.4
Expand Down Expand Up @@ -85,7 +85,7 @@ tomli==2.0.1
typing_extensions==4.12.2
uritemplate==4.1.1
urllib3==2.2.2
virtualenv==20.26.3
virtualenv==20.26.4
wagtail==6.2.1
wagtail_trash==3.0.0
wagtail_modeladmin==2.0.0
Expand Down
72 changes: 68 additions & 4 deletions tests/unit/caching/public_api/test_crawlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_build_base_headers(self):
# Given
mocked_cdn_auth_key = mock.Mock()
crawler = PublicAPICrawler(
public_api_base_url=mock.Mock(), cdn_auth_key=mocked_cdn_auth_key
public_api_base_url=FAKE_URL, cdn_auth_key=mocked_cdn_auth_key
)

# When
Expand All @@ -74,7 +74,7 @@ def test_build_headers_for_html(self):
# Given
mocked_cdn_auth_key = mock.Mock()
crawler = PublicAPICrawler(
public_api_base_url=mock.Mock(), cdn_auth_key=mocked_cdn_auth_key
public_api_base_url=FAKE_URL, cdn_auth_key=mocked_cdn_auth_key
)

# When
Expand All @@ -97,7 +97,7 @@ def test_build_headers_for_json(self):
# Given
mocked_cdn_auth_key = mock.Mock()
crawler = PublicAPICrawler(
public_api_base_url=mock.Mock(), cdn_auth_key=mocked_cdn_auth_key
public_api_base_url=FAKE_URL, cdn_auth_key=mocked_cdn_auth_key
)

# When
Expand Down Expand Up @@ -330,27 +330,91 @@ def test_crawl_public_api_themes_path(self, spy_crawl: mock.MagicMock):
url=expected_initial_root_path, crawled_urls=[]
)

@mock.patch.object(PublicAPICrawler, "crawl")
def test_crawl_public_api_themes_path_v2(self, spy_crawl: mock.MagicMock):
"""
Given a URL to crawl
When `crawl_public_api_themes_path_v2()` is called
from an instance of the `PublicAPICrawler`
Then the correct URL is passed to the `crawl()` method
Patches:
`spy_crawl`: For the main assertion,
to check that the recursive `crawl` method is called
for the correct initial root path
"""
# Given
base_url = FAKE_URL
public_api_crawler = PublicAPICrawler(
public_api_base_url=base_url, cdn_auth_key=mock.Mock()
)

# When
public_api_crawler.crawl_public_api_themes_path_v2()

# Then
expected_initial_root_path = f"{FAKE_URL}/v2/themes/"
spy_crawl.assert_called_once_with(
url=expected_initial_root_path, crawled_urls=[]
)

@mock.patch.object(PublicAPICrawler, "crawl_public_api_themes_path_v2")
@mock.patch.object(PublicAPICrawler, "crawl_public_api_themes_path")
def test_process_all_routes(
self,
spy_crawl_public_api_themes_path: mock.MagicMock,
spy_crawl_public_api_themes_path_v2: mock.MagicMock,
fake_public_api_crawler: PublicAPICrawler,
):
"""
Given no input arguments
When `process_all_routes()` is called
from an instance of the `PublicAPICrawler`
Then a call is delegated to the `crawl_public_api_themes_path()`
Then a call is delegated to the correct methods
Patches:
`spy_crawl_public_api_themes_path`: For the main assertion
`spy_crawl_public_api_themes_path_v2`: For the main assertion
to check the v2 API is also being crawled
"""
# Given / When
fake_public_api_crawler.process_all_routes()

# Then
spy_crawl_public_api_themes_path.assert_called_once()
spy_crawl_public_api_themes_path_v2.assert_called_once()

@mock.patch(f"{MODULE_PATH}.ThreadPool")
def test_process_all_routes_processes_both_apis_in_seperate_threads(
self, spy_thread_pool: mock.MagicMock, fake_public_api_crawler: PublicAPICrawler
):
"""
Given no input arguments
When `process_all_routes()` is called
from an instance of the `PublicAPICrawler`
Then the different API versions are processed
within individual threads
"""
# Given / When
fake_public_api_crawler.process_all_routes()

# Then
spy_thread_pool_in_context_manager = mock.call().__enter__()
expected_calls = [
spy_thread_pool_in_context_manager,
spy_thread_pool_in_context_manager.apply_async(
fake_public_api_crawler.crawl_public_api_themes_path
),
spy_thread_pool_in_context_manager.apply_async(
fake_public_api_crawler.crawl_public_api_themes_path_v2
),
spy_thread_pool_in_context_manager.close(),
spy_thread_pool_in_context_manager.join(),
mock.call().__exit__(None, None, None),
]
spy_thread_pool.assert_has_calls(calls=expected_calls, any_order=False)


class TestPublicAPICrawlerCrawlMethod:
Expand Down

0 comments on commit 7dbc5f1

Please sign in to comment.