Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add job start and end time #30

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ jobs:
curl http://localhost:6800/daemonstatus.json

- name: Run tests
run: pytest -vv test_api.py
run: pytest -vv scrapyd_k8s/tests/
2 changes: 1 addition & 1 deletion .github/workflows/test-k8s.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ jobs:
run: |
TEST_MAX_WAIT=60 \
TEST_AVAILABLE_VERSIONS=latest,`skopeo list-tags docker://ghcr.io/q-m/scrapyd-k8s-spider-example | jq -r '.Tags | map(select(. != "latest" and (startswith("sha-") | not))) | join(",")'` \
pytest -vv --color=yes test_api.py
pytest -vv --color=yes scrapyd_k8s/tests/
2 changes: 1 addition & 1 deletion .github/workflows/test-manifest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ jobs:
TEST_BASE_URL=`minikube service scrapyd-k8s --url` \
TEST_MAX_WAIT=60 \
TEST_AVAILABLE_VERSIONS=latest,`skopeo list-tags docker://ghcr.io/q-m/scrapyd-k8s-spider-example | jq -r '.Tags | map(select(. != "latest" and (startswith("sha-") | not))) | join(",")'` \
pytest -vv --color=yes test_api.py
pytest -vv --color=yes scrapyd_k8s/tests/
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ curl http://localhost:6800/listjobs.json
{
"finished":[],
"pending":[],
"running":[{"id":"e9b81fccbec211eeb3b109f30f136c01","project":"example","spider":"quotes","state":"running"}],
"running":[{"id":"e9b81fccbec211eeb3b109f30f136c01","project":"example","spider":"quotes","state":"running", "start_time": "2012-09-12 10:14:03.594664"}],
"status":"ok"
}
```
Expand Down
Empty file added scrapyd_k8s/__init__.py
Empty file.
30 changes: 19 additions & 11 deletions scrapyd_k8s/launcher/docker.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import re
import os
import socket

import docker
from ..utils import native_stringify_dict

from ..utils import format_iso_date_string, native_stringify_dict

class Docker:

Expand Down Expand Up @@ -66,21 +66,29 @@ def cancel(self, project_id, job_id, signal):
c.kill(signal='SIG' + signal)
return prevstate

def _parse_job(self, c):
return {
'id': c.labels.get(self.LABEL_JOB_ID),
'state': self._docker_to_scrapyd_status(c.status),
'project': c.labels.get(self.LABEL_PROJECT),
'spider': c.labels.get(self.LABEL_SPIDER)
def _parse_job(self, container):
state = self._docker_to_scrapyd_status(container.status)
job = {
'id': container.labels.get(self.LABEL_JOB_ID),
'state': state,
'project': container.labels.get(self.LABEL_PROJECT),
'spider': container.labels.get(self.LABEL_SPIDER),
}
wvengen marked this conversation as resolved.
Show resolved Hide resolved

def _get_container(self, project_id, job_id):
filters = { 'label': self.LABEL_JOB_ID + '=' + job_id }
if state in ['running', 'finished']:
job['start_time'] = format_iso_date_string(container.attrs['State']['StartedAt'])
if state == 'finished':
job['end_time'] = format_iso_date_string(container.attrs['State']['FinishedAt'])

return job

def _get_container(self, project_id: str, job_id: str):
filters = {'label': self.LABEL_JOB_ID + '=' + job_id}
c = self._docker.containers.list(all=True, filters=filters)
if not c:
return None
c = c[0]

c = c[0]
if c.labels.get(self.LABEL_PROJECT) != project_id:
return None

Expand Down
27 changes: 18 additions & 9 deletions scrapyd_k8s/launcher/k8s.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import os
from signal import Signals

import kubernetes
import kubernetes.stream
from signal import Signals
from subprocess import check_output, CalledProcessError

from ..utils import native_stringify_dict
from ..utils import format_datetime_object, native_stringify_dict

class K8s:

Expand Down Expand Up @@ -122,13 +121,24 @@ def cancel(self, project, job_id, signal):
return prevstate

def _parse_job(self, job):
return {
state = self._k8s_job_to_scrapyd_status(job)
_job = {
'id': job.metadata.labels.get(self.LABEL_JOB_ID),
'state': self._k8s_job_to_scrapyd_status(job),
'state': state,
'project': job.metadata.labels.get(self.LABEL_PROJECT),
'spider': job.metadata.labels.get(self.LABEL_SPIDER)
'spider': job.metadata.labels.get(self.LABEL_SPIDER),
}

if state in ['running', 'finished']:
_job['start_time'] = format_datetime_object(job.status.start_time)
if state == 'finished':
# Finished is mapped to succeeded and pending. Only when a job has succeeded will it have a
# completion time.
completion_time = job.status.completion_time
_job['end_time'] = format_datetime_object(completion_time) if completion_time else None

return _job

def _get_job(self, project, job_id):
label = self.LABEL_JOB_ID + '=' + job_id
r = self._k8s_batch.list_namespaced_job(namespace=self._namespace, label_selector=label)
Expand Down Expand Up @@ -170,13 +180,12 @@ def _k8s_job_name(self, project, job_id):

def _k8s_kill(self, pod_name, signal):
# exec needs stream, which modifies client, so use separate instance
k8s = kubernetes.client.CoreV1Api()
resp = kubernetes.stream.stream(
k8s.connect_get_namespaced_pod_exec,
self._k8s.connect_get_namespaced_pod_exec,
pod_name,
namespace=self._namespace,
# this is a bit blunt, bit it works and is usually available
command=['/usr/sbin/killall5', '-' + str(signal)],
stderr=True
)
# TODO figure out how to get return value
# TODO figure out how to get return value
Empty file added scrapyd_k8s/tests/__init__.py
Empty file.
1 change: 0 additions & 1 deletion test_api.py → scrapyd_k8s/tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python3
import os
import pytest
import requests
import time

Expand Down
18 changes: 18 additions & 0 deletions scrapyd_k8s/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from datetime import datetime
import pytest
from scrapyd_k8s.utils import format_datetime_object, format_iso_date_string


def test_format_iso_date_string():
input_date = "2024-08-30T13:45:30.123456"
expected = "2024-08-30 13:45:30.123456"
assert format_iso_date_string(input_date) == expected

def test_format_iso_date_string_invalid():
with pytest.raises(ValueError):
format_iso_date_string("invalid_date")

def test_format_datetime_object():
input_datetime = datetime(2024, 8, 30, 13, 45, 30, 123456)
expected = "2024-08-30 13:45:30.123456"
assert format_datetime_object(input_datetime) == expected
13 changes: 12 additions & 1 deletion scrapyd_k8s/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from datetime import datetime


TIME_FORMAT = '%Y-%m-%d %H:%M:%S.%f'

def _to_native_str(text, encoding="utf-8", errors="strict"):
if isinstance(text, str):
return text
Expand Down Expand Up @@ -25,4 +30,10 @@ def native_stringify_dict(dct_or_tuples, encoding="utf-8", keys_only=True):
else:
v = _to_native_str(v, encoding)
d[k] = v
return d
return d

def format_iso_date_string(date_string):
return datetime.fromisoformat(date_string).strftime(TIME_FORMAT)

def format_datetime_object(datetime_obj):
return datetime_obj.strftime(TIME_FORMAT)
Loading