Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nodename to response #27

Merged
merged 9 commits into from
Apr 10, 2024
9 changes: 9 additions & 0 deletions kubernetes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ spec:
- containerPort: 6800
name: http
protocol: TCP
env:
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: MY_DEPLOYMENT_NAME
valueFrom:
fieldRef:
fieldPath: metadata.labels['app.kubernetes.io/name']
readinessProbe:
failureThreshold: 3
httpGet:
Expand Down
14 changes: 12 additions & 2 deletions scrapyd_k8s/api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import uuid

from flask import Flask, request
from flask import Flask, request, Response, jsonify
from flask_basicauth import BasicAuth
from markupsafe import escape
from natsort import natsort_keygen, ns
Expand All @@ -14,6 +14,7 @@
launcher = (config.launcher_cls())(config)
scrapyd_config = config.scrapyd()


vlerkin marked this conversation as resolved.
Show resolved Hide resolved
@app.get("/")
def home():
return "<html><body><h1>scrapyd-k8s</h1></body></html>"
Expand Down Expand Up @@ -102,6 +103,15 @@ def api_listjobs():
# TODO perhaps remove state from jobs
return { 'status': 'ok', 'pending': pending, 'running': running, 'finished': finished }

# middleware that adds "node_name" to each response if it is a JSON
@app.after_request
def after_request(response: Response):
if response.is_json:
data = response.json
data["node_name"] = config.scrapyd().get("node_name", launcher.get_node_name())
response.data = jsonify(data).data
return response

def error(msg, status=200):
return { 'status': 'error', 'message': msg }, status

Expand All @@ -124,4 +134,4 @@ def run():
enable_authentication(app, config_username, config_password)

# run server
app.run(host=host, port=port)
app.run(host=host, port=port)
6 changes: 6 additions & 0 deletions scrapyd_k8s/launcher/docker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import re
import os
import socket

import docker
from ..utils import native_stringify_dict

Expand All @@ -18,6 +21,9 @@ class Docker:
def __init__(self, config):
self._docker = docker.from_env()

def get_node_name(self):
return socket.gethostname()

vlerkin marked this conversation as resolved.
Show resolved Hide resolved
def listjobs(self, project_id=None):
label = self.LABEL_PROJECT + ('=%s'%(project_id) if project_id else '')
jobs = self._docker.containers.list(all=True, filters={ 'label': label })
Expand Down
7 changes: 7 additions & 0 deletions scrapyd_k8s/launcher/k8s.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

import kubernetes
import kubernetes.stream
from signal import Signals
Expand All @@ -23,6 +25,11 @@ def __init__(self, config):
self._k8s = kubernetes.client.CoreV1Api()
self._k8s_batch = kubernetes.client.BatchV1Api()

def get_node_name(self):
deployment = os.getenv('MY_DEPLOYMENT_NAME', 'default')
namespace = os.getenv('MY_NAMESPACE')
return ".".join([n for n in [namespace, deployment] if n])

vlerkin marked this conversation as resolved.
Show resolved Hide resolved
def listjobs(self, project=None):
label = self.LABEL_PROJECT + ('=%s'%(project) if project else '')
jobs = self._k8s_batch.list_namespaced_job(namespace=self._namespace, label_selector=label)
Expand Down
39 changes: 23 additions & 16 deletions test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,36 +15,37 @@
MAX_WAIT = int(os.getenv('TEST_MAX_WAIT', '6'))
STATIC_SLEEP = float(os.getenv('TEST_STATIC_SLEEP', '2'))


vlerkin marked this conversation as resolved.
Show resolved Hide resolved
def test_root_ok():
response = requests.get(BASE_URL)
assert response.status_code == 200
assert response.headers['Content-Type'] == 'text/html; charset=utf-8'
assert 'scrapyd-k8s' in response.text
assert '</html>' in response.text


def test_healthz_ok():
response = requests.get(BASE_URL + '/healthz')
assert response.status_code == 200


def test_daemonstatus_ok():
response = requests.get(BASE_URL + '/daemonstatus.json')
assert_response_ok(response)
# TODO assert response.json() == { 'status': 'ok', ... }


def test_listprojects_ok():
response = requests.get(BASE_URL + '/listprojects.json')
assert_response_ok(response)
assert response.json() == { 'status': 'ok', 'projects': AVAIL_PROJECTS }

json = response.json()
assert json['projects'] == AVAIL_PROJECTS
assert 'node_name' in json
vlerkin marked this conversation as resolved.
Show resolved Hide resolved

def test_listversions_ok():
response = requests.get(BASE_URL + '/listversions.json?project=' + RUN_PROJECT)
assert_response_ok(response)
assert response.json() == { 'status': 'ok', 'versions': AVAIL_VERSIONS }

json = response.json()
assert json['versions'] == AVAIL_VERSIONS
assert 'node_name' in json

def test_listversions_project_missing():
response = requests.get(BASE_URL + '/listversions.json')
Expand All @@ -54,16 +55,21 @@ def test_listversions_project_not_found():
response = requests.get(BASE_URL + '/listversions.json?project=' + 'nonexistant')
assert_response_error(response, 404)


def test_listspiders_ok():
response = requests.get(BASE_URL + '/listspiders.json?project=' + RUN_PROJECT + '&_version=' + RUN_VERSION)
assert_response_ok(response)
assert response.json() == { 'status': 'ok', 'spiders': AVAIL_SPIDERS }

json = response.json()
assert json['spiders'] == AVAIL_SPIDERS
assert 'node_name' in json

def test_listspiders_ok_without_version():
response = requests.get(BASE_URL + '/listspiders.json?project=' + RUN_PROJECT)
assert_response_ok(response)
assert response.json() == { 'status': 'ok', 'spiders': AVAIL_SPIDERS }

json = response.json()
assert json['spiders'] == AVAIL_SPIDERS
assert 'node_name' in json

def test_listspiders_project_missing():
response = requests.get(BASE_URL + '/listspiders.json')
Expand All @@ -77,7 +83,6 @@ def test_listspiders_version_not_found():
response = requests.get(BASE_URL + '/listspiders.json?project=' + RUN_PROJECT + '&_version=' + 'nonexistant')
assert_response_error(response, 404)


def test_schedule_project_missing():
response = requests.post(BASE_URL + '/schedule.json', data={})
assert_response_error(response, 400)
Expand All @@ -93,7 +98,6 @@ def test_schedule_spider_missing():
# scheduling a non-existing spider will try to start it, so no error
# scheduling a non-existing project version will try to start it, so no error


def test_cancel_project_missing():
response = requests.post(BASE_URL + '/cancel.json', data={})
assert_response_error(response, 400)
Expand All @@ -105,8 +109,6 @@ def test_cancel_jobid_missing():
assert_response_error(response, 400)

# TODO test cancel with invalid signal (currently returns server error, could be improved)


def test_scenario_regular_ok():
scenario_regular({
'project': RUN_PROJECT, '_version': RUN_VERSION, 'spider': RUN_SPIDER,
Expand All @@ -120,7 +122,6 @@ def test_scenario_regular_ok_without_version():
})

# TODO test_scenario_cancel_scheduled_ok (needs a way to make sure a job is not running yet)
vlerkin marked this conversation as resolved.
Show resolved Hide resolved

def test_scenario_cancel_running_finished_ok():
assert_listjobs()
# schedule a new job and wait until it is running
Expand All @@ -138,16 +139,22 @@ def test_scenario_cancel_running_finished_ok():
'project': RUN_PROJECT, 'job': jobid, 'signal': 'KILL'
})
assert_response_ok(response)
assert response.json() == { 'status': 'ok', 'prevstate': 'running' }

json = response.json()
assert json['prevstate'] == 'running'
assert 'node_name' in json

# wait until the job has stopped
listjobs_wait(jobid, 'finished')
jobinfo = assert_listjobs(finished=jobid)
assert jobinfo == { 'id': jobid, 'project': RUN_PROJECT, 'spider': RUN_SPIDER, 'state': 'finished' }
# then cancel it again, though nothing would happen
response = requests.post(BASE_URL + '/cancel.json', data={ 'project': RUN_PROJECT, 'job': jobid })
assert_response_ok(response)
assert response.json() == { 'status': 'ok', 'prevstate': 'finished' }

json = response.json()
assert json['prevstate'] == 'finished'
assert 'node_name' in json

def scenario_regular(schedule_args):
assert_listjobs()
Expand Down
Loading