Skip to content

Commit

Permalink
[kubelet] add more cpu metrics (#2595)
Browse files Browse the repository at this point in the history
* Add more cpu metrics

* add metadata

* remove cfs periods, switch some CPU metrics to gauge

* add unit to kubelet cpu throttled metric

* Fix metadata, switch some metrics to rate
  • Loading branch information
hkaj committed Nov 22, 2018
1 parent 61f8833 commit 5371ef9
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 9 deletions.
32 changes: 25 additions & 7 deletions kubelet/datadog_checks/kubelet/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ def __init__(self, *args, **kwargs):

self.CADVISOR_METRIC_TRANSFORMERS = {
'container_cpu_usage_seconds_total': self.container_cpu_usage_seconds_total,
'container_cpu_load_average_10s': self.container_cpu_load_average_10s,
'container_cpu_system_seconds_total': self.container_cpu_system_seconds_total,
'container_cpu_user_seconds_total': self.container_cpu_user_seconds_total,
'container_cpu_cfs_throttled_periods_total': self.container_cpu_cfs_throttled_periods_total,
'container_cpu_cfs_throttled_seconds_total': self.container_cpu_cfs_throttled_seconds_total,
'container_fs_reads_bytes_total': self.container_fs_reads_bytes_total,
'container_fs_writes_bytes_total': self.container_fs_writes_bytes_total,
'container_network_receive_bytes_total': self.container_network_receive_bytes_total,
Expand All @@ -45,7 +50,7 @@ def __init__(self, *args, **kwargs):
'container_memory_usage_bytes': self.container_memory_usage_bytes,
'container_memory_working_set_bytes': self.container_memory_working_set_bytes,
'container_memory_rss': self.container_memory_rss,
'container_spec_memory_limit_bytes': self.container_spec_memory_limit_bytes
'container_spec_memory_limit_bytes': self.container_spec_memory_limit_bytes,
}

def _create_cadvisor_prometheus_instance(self, instance):
Expand All @@ -63,11 +68,6 @@ def _create_cadvisor_prometheus_instance(self, instance):
'prometheus_url': instance.get('cadvisor_metrics_endpoint', 'dummy_url/cadvisor'),
'ignore_metrics': [
'container_cpu_cfs_periods_total',
'container_cpu_cfs_throttled_periods_total',
'container_cpu_cfs_throttled_seconds_total',
'container_cpu_load_average_10s',
'container_cpu_system_seconds_total',
'container_cpu_user_seconds_total',
'container_fs_inodes_free',
'container_fs_inodes_total',
'container_fs_io_current',
Expand Down Expand Up @@ -254,7 +254,6 @@ def _sum_values_by_context(metric, uid_from_labels):
)
# TODO
# metric.Clear() # Ignore this metric message

return seen

def _process_container_metric(self, type, metric_name, metric, scraper_config):
Expand Down Expand Up @@ -384,7 +383,26 @@ def container_cpu_usage_seconds_total(self, metric, scraper_config):
# Replacing the sample tuple to convert cores in nano cores
metric.samples[i] = (sample[self.SAMPLE_NAME], sample[self.SAMPLE_LABELS],
sample[self.SAMPLE_VALUE] * 10. ** 9)
self._process_container_metric('rate', metric_name, metric, scraper_config)

def container_cpu_load_average_10s(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.cpu.load.10s.avg'
self._process_container_metric('gauge', metric_name, metric, scraper_config)

def container_cpu_system_seconds_total(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.cpu.system.total'
self._process_container_metric('rate', metric_name, metric, scraper_config)

def container_cpu_user_seconds_total(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.cpu.user.total'
self._process_container_metric('rate', metric_name, metric, scraper_config)

def container_cpu_cfs_throttled_periods_total(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.cpu.cfs.throttled.periods'
self._process_container_metric('rate', metric_name, metric, scraper_config)

def container_cpu_cfs_throttled_seconds_total(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.cpu.cfs.throttled.seconds'
self._process_container_metric('rate', metric_name, metric, scraper_config)

def container_fs_reads_bytes_total(self, metric, scraper_config):
Expand Down
5 changes: 5 additions & 0 deletions kubelet/metadata.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name
kubernetes.cpu.load.10s.avg,gauge,,,,Container cpu load average over the last 10 seconds,0,kubelet,k8s.cpu.load.10s
kubernetes.cpu.system.total,rate,,core,,The number of cores used for system time,0,kubelet,k8s.cpu.system
kubernetes.cpu.user.total,rate,,core,,The number of cores used for user time,0,kubelet,k8s.cpu.user
kubernetes.cpu.cfs.throttled.period,rate,,,,Number of throttled period intervals,-1,kubelet,k8s.cpu.throttled.periods
kubernetes.cpu.cfs.throttled.second,rate,,,,Total time duration the container has been throttled,-1,kubelet,k8s.cpu.throttled.sec
kubernetes.cpu.capacity,gauge,,core,,The number of cores in this machine,0,kubelet,k8s.cpu.capacity
kubernetes.cpu.usage.total,gauge,,nanocore,,The number of cores used,-1,kubelet,k8s.cpu
kubernetes.cpu.limits,gauge,,core,,The limit of cpu cores set,0,kubelet,k8s.cpu.limits
Expand Down
9 changes: 7 additions & 2 deletions kubelet/tests/test_kubelet.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,15 @@
'kubernetes.memory.working_set',
'kubernetes.memory.rss',
'kubernetes.network.rx_bytes',
'kubernetes.network.tx_bytes'
'kubernetes.network.tx_bytes',
]

EXPECTED_METRICS_PROMETHEUS = [
'kubernetes.cpu.load.10s.avg',
'kubernetes.cpu.system.total',
'kubernetes.cpu.user.total',
'kubernetes.cpu.cfs.throttled.periods',
'kubernetes.cpu.cfs.throttled.seconds',
'kubernetes.memory.usage_pct',
'kubernetes.network.rx_dropped',
'kubernetes.network.rx_errors',
Expand Down Expand Up @@ -267,7 +272,7 @@ def test_prometheus_filtering(monkeypatch, aggregator):
mock_method.assert_called_once()
metric = mock_method.call_args[0][0]
assert len(metric.samples) == 12
for name, labels, value in metric.samples:
for name, labels, _ in metric.samples:
assert name == "container_cpu_usage_seconds_total"
assert labels["pod_name"] != ""

Expand Down

0 comments on commit 5371ef9

Please sign in to comment.