Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more cpu metrics #2595

Merged
merged 5 commits into from
Nov 22, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 26 additions & 8 deletions kubelet/datadog_checks/kubelet/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ def __init__(self, *args, **kwargs):

self.CADVISOR_METRIC_TRANSFORMERS = {
'container_cpu_usage_seconds_total': self.container_cpu_usage_seconds_total,
'container_cpu_load_average_10s': self.container_cpu_load_average_10s,
'container_cpu_system_seconds_total': self.container_cpu_system_seconds_total,
'container_cpu_user_seconds_total': self.container_cpu_user_seconds_total,
'container_cpu_cfs_throttled_periods_total': self.container_cpu_cfs_throttled_periods_total,
'container_cpu_cfs_throttled_seconds_total': self.container_cpu_cfs_throttled_seconds_total,
'container_fs_reads_bytes_total': self.container_fs_reads_bytes_total,
'container_fs_writes_bytes_total': self.container_fs_writes_bytes_total,
'container_network_receive_bytes_total': self.container_network_receive_bytes_total,
Expand All @@ -45,7 +50,7 @@ def __init__(self, *args, **kwargs):
'container_memory_usage_bytes': self.container_memory_usage_bytes,
'container_memory_working_set_bytes': self.container_memory_working_set_bytes,
'container_memory_rss': self.container_memory_rss,
'container_spec_memory_limit_bytes': self.container_spec_memory_limit_bytes
'container_spec_memory_limit_bytes': self.container_spec_memory_limit_bytes,
}

def _create_cadvisor_prometheus_instance(self, instance):
Expand All @@ -63,11 +68,6 @@ def _create_cadvisor_prometheus_instance(self, instance):
'prometheus_url': instance.get('cadvisor_metrics_endpoint', 'dummy_url/cadvisor'),
'ignore_metrics': [
'container_cpu_cfs_periods_total',
'container_cpu_cfs_throttled_periods_total',
'container_cpu_cfs_throttled_seconds_total',
'container_cpu_load_average_10s',
'container_cpu_system_seconds_total',
'container_cpu_user_seconds_total',
'container_fs_inodes_free',
'container_fs_inodes_total',
'container_fs_io_current',
Expand Down Expand Up @@ -254,7 +254,6 @@ def _sum_values_by_context(metric, uid_from_labels):
)
# TODO
# metric.Clear() # Ignore this metric message

return seen

def _process_container_metric(self, type, metric_name, metric, scraper_config):
Expand Down Expand Up @@ -384,9 +383,28 @@ def container_cpu_usage_seconds_total(self, metric, scraper_config):
# Replacing the sample tuple to convert cores in nano cores
metric.samples[i] = (sample[self.SAMPLE_NAME], sample[self.SAMPLE_LABELS],
sample[self.SAMPLE_VALUE] * 10. ** 9)

self._process_container_metric('rate', metric_name, metric, scraper_config)

def container_cpu_load_average_10s(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.cpu.load.10s.avg'
self._process_container_metric('gauge', metric_name, metric, scraper_config)

def container_cpu_system_seconds_total(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.cpu.system.total'
self._process_container_metric('gauge', metric_name, metric, scraper_config)

def container_cpu_user_seconds_total(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.cpu.user.total'
self._process_container_metric('gauge', metric_name, metric, scraper_config)

def container_cpu_cfs_throttled_periods_total(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.cpu.cfs.throttled.periods'
self._process_container_metric('gauge', metric_name, metric, scraper_config)

def container_cpu_cfs_throttled_seconds_total(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.cpu.cfs.throttled.seconds'
self._process_container_metric('gauge', metric_name, metric, scraper_config)

def container_fs_reads_bytes_total(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.io.read_bytes'
self._process_container_metric('rate', metric_name, metric, scraper_config)
Expand Down
5 changes: 5 additions & 0 deletions kubelet/metadata.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name
kubernetes.cpu.load.10s.avg,gauge,,,,Container cpu load average over the last 10 seconds,0,kubelet,k8s.cpu.load.10s
kubernetes.cpu.system.total,gauge,,nanocore,,The number of cores used for system time,0,kubelet,k8s.cpu.system
kubernetes.cpu.user.total,gauge,,nanocore,,The number of cores used for user time,0,kubelet,k8s.cpu.user
kubernetes.cpu.cfs.throttled.periods,gauge,,,,Number of throttled period intervals,-1,kubelet,k8s.cpu.throttled.periods
kubernetes.cpu.cfs.throttled.seconds,gauge,,,,Total time duration the container has been throttled,-1,kubelet,k8s.cpu.throttled.sec
hkaj marked this conversation as resolved.
Show resolved Hide resolved
kubernetes.cpu.capacity,gauge,,core,,The number of cores in this machine,0,kubelet,k8s.cpu.capacity
kubernetes.cpu.usage.total,gauge,,nanocore,,The number of cores used,-1,kubelet,k8s.cpu
kubernetes.cpu.limits,gauge,,core,,The limit of cpu cores set,0,kubelet,k8s.cpu.limits
Expand Down
9 changes: 7 additions & 2 deletions kubelet/tests/test_kubelet.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,15 @@
'kubernetes.memory.working_set',
'kubernetes.memory.rss',
'kubernetes.network.rx_bytes',
'kubernetes.network.tx_bytes'
'kubernetes.network.tx_bytes',
]

EXPECTED_METRICS_PROMETHEUS = [
'kubernetes.cpu.load.10s.avg',
'kubernetes.cpu.system.total',
'kubernetes.cpu.user.total',
'kubernetes.cpu.cfs.throttled.periods',
'kubernetes.cpu.cfs.throttled.seconds',
'kubernetes.memory.usage_pct',
'kubernetes.network.rx_dropped',
'kubernetes.network.rx_errors',
Expand Down Expand Up @@ -261,7 +266,7 @@ def test_prometheus_filtering(monkeypatch, aggregator):
mock_method.assert_called_once()
metric = mock_method.call_args[0][0]
assert len(metric.samples) == 12
for name, labels, value in metric.samples:
for name, labels, _ in metric.samples:
assert name == "container_cpu_usage_seconds_total"
assert labels["pod_name"] != ""

Expand Down