From df90e1a5fcaed55b0c89c932ab6e99360e0e8b05 Mon Sep 17 00:00:00 2001 From: wvengen Date: Thu, 15 Feb 2024 14:13:27 +0100 Subject: [PATCH] Add static spider (mostly for testing) --- README.md | 12 +++++++++++- example/spiders/static_spider.py | 13 +++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 example/spiders/static_spider.py diff --git a/README.md b/README.md index b99a2af..8b79298 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,15 @@ so that one can get started easily. ## Scraped site -This spider returns quotes from [quotes.toscrape.com](https://quotes.toscrape.com). +This project contains two spiders. The `quotes` spider returns quotes from +[quotes.toscrape.com](https://quotes.toscrape.com). + +The `static` spider returns a single dummy quote without accessing the network. +This can be used for testing. There are several settings and environment variables +that modify its behaviour: +- spider setting `STATIC_TEXT` - quote text (default _To be, or not to be_) +- spider setting `STATIC_AUTHOR` - quote author (default _Shakespeare_) +- environment variable `STATIC_TAGS` - quote tags (default _static_) ## Running locally @@ -33,6 +41,7 @@ $ scrapy list ``` > ``` > quotes +> static > ``` ```sh $ scrapy crawl quotes @@ -67,6 +76,7 @@ docker run --rm example scrapy list ``` > ``` > quotes +> static > ``` ```sh diff --git a/example/spiders/static_spider.py b/example/spiders/static_spider.py new file mode 100644 index 0000000..6aea862 --- /dev/null +++ b/example/spiders/static_spider.py @@ -0,0 +1,13 @@ +import os +from scrapy import Spider + +class StaticSpider(Spider): + name = "static" + start_urls = ["file:///dev/null"] + + def parse(self, response): + yield { + 'text': self.settings.get('STATIC_TEXT', 'To be or not to be'), + 'author': self.settings.get('STATIC_AUTHOR', 'Shakespeare'), + 'tags': os.getenv('STATIC_TAGS', 'static') + }