Skip to content

Commit

Permalink
Add kubelet rss and working set memory metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
mfpierre committed Oct 12, 2018
1 parent 46a1448 commit 7247db5
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 13 deletions.
2 changes: 2 additions & 0 deletions kubelet/datadog_checks/kubelet/cadvisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
'cpu.*.total']
DEFAULT_ENABLED_GAUGES = [
'memory.usage',
'memory.working_set',
'memory.rss',
'filesystem.usage']
DEFAULT_POD_LEVEL_METRICS = [
'network.*']
Expand Down
43 changes: 31 additions & 12 deletions kubelet/datadog_checks/kubelet/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ def __init__(self, *args, **kwargs):
'container_fs_usage_bytes': self.container_fs_usage_bytes,
'container_fs_limit_bytes': self.container_fs_limit_bytes,
'container_memory_usage_bytes': self.container_memory_usage_bytes,
'container_memory_working_set_bytes': self.container_memory_working_set_bytes,
'container_memory_rss': self.container_memory_rss,
'container_spec_memory_limit_bytes': self.container_spec_memory_limit_bytes
}

Expand Down Expand Up @@ -303,14 +305,15 @@ def _process_pod_rate(self, metric_name, metric, scraper_config):
val = sample[self.SAMPLE_VALUE]
self.rate(metric_name, val, tags)

def _process_usage_metric(self, m_name, metric, cache, scraper_config):
def _process_usage_metric(self, m_name, metric, cache, populate_cache, scraper_config):
"""
Takes a metric object, a metric name, and a cache dict where it will store
container_name --> (value, tags) so that _process_limit_metric can compute usage_pct
it also submit said value and tags as a gauge.
"""
# track containers that still exist in the cache
seen_keys = {k: False for k in cache}
if populate_cache:
# track containers that still exist in the cache
seen_keys = {k: False for k in cache}

samples = self._sum_values_by_context(metric, self._get_container_id_if_container_metric)
for c_id, sample in samples.iteritems():
Expand All @@ -333,14 +336,17 @@ def _process_usage_metric(self, m_name, metric, cache, scraper_config):
tags = list(set(tags))

val = sample[self.SAMPLE_VALUE]
cache[c_name] = (val, tags)
seen_keys[c_name] = True
if populate_cache:
cache[c_name] = (val, tags)
seen_keys[c_name] = True

self.gauge(m_name, val, tags)

# purge the cache
for k, seen in seen_keys.iteritems():
if not seen:
del cache[k]
if populate_cache:
# purge the cache
for k, seen in seen_keys.iteritems():
if not seen:
del cache[k]

def _process_limit_metric(self, m_name, metric, cache, scraper_config, pct_m_name=None):
"""
Expand Down Expand Up @@ -422,7 +428,7 @@ def container_fs_usage_bytes(self, metric, scraper_config):
if metric.type not in METRIC_TYPES:
self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name))
return
self._process_usage_metric(metric_name, metric, self.fs_usage_bytes, scraper_config)
self._process_usage_metric(metric_name, metric, self.fs_usage_bytes, True, scraper_config)

def container_fs_limit_bytes(self, metric, scraper_config):
"""
Expand All @@ -436,12 +442,25 @@ def container_fs_limit_bytes(self, metric, scraper_config):
self._process_limit_metric('', metric, self.fs_usage_bytes, scraper_config, pct_m_name)

def container_memory_usage_bytes(self, metric, scraper_config):
"""TODO: add swap, cache, failcnt and rss"""
metric_name = scraper_config['namespace'] + '.memory.usage'
if metric.type not in METRIC_TYPES:
self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name))
return
self._process_usage_metric(metric_name, metric, self.mem_usage_bytes, scraper_config)
self._process_usage_metric(metric_name, metric, self.mem_usage_bytes, True, scraper_config)

def container_memory_working_set_bytes(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.memory.working_set'
if metric.type not in METRIC_TYPES:
self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name))
return
self._process_usage_metric(metric_name, metric, {}, False, scraper_config)

def container_memory_rss(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.memory.rss'
if metric.type not in METRIC_TYPES:
self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name))
return
self._process_usage_metric(metric_name, metric, {}, False, scraper_config)

def container_spec_memory_limit_bytes(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.memory.limits'
Expand Down
4 changes: 3 additions & 1 deletion kubelet/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ kubernetes.io.write_bytes,gauge,,byte,,The amount of bytes written to the disk,0
kubernetes.memory.capacity,gauge,,byte,,The amount of memory (in bytes) in this machine,0,kubelet,k8s.mem.capacity
kubernetes.memory.limits,gauge,,byte,,The limit of memory set,0,kubelet,k8s.mem.limits
kubernetes.memory.requests,gauge,,byte,,The requested memory,0,kubelet,k8s.mem.requests
kubernetes.memory.usage,gauge,,byte,,The amount of memory used,-1,kubelet,k8s.mem
kubernetes.memory.usage,gauge,,byte,,Current memory usage in bytes including all memory regardless of when it was accessed,-1,kubelet,k8s.mem
kubernetes.memory.working_set,gauge,,byte,,Current working set in bytes - this is what the OOM killer is watching for,-1,kubelet,k8s.mem.ws
kubernetes.memory.rss,gauge,,byte,,Size of RSS in bytes,-1,kubelet,k8s.mem.rss
kubernetes.memory.usage_pct,gauge,,fraction,,The percentage of memory used,-1,kubelet,k8s.mem.used_pct
kubernetes.network.rx_bytes,gauge,,byte,second,The amount of bytes per second received,0,kubelet,k8s.net.rx
kubernetes.network.rx_dropped,gauge,,packet,second,The amount of rx packets dropped per second,-1,kubelet,k8s.net.rx.drop
Expand Down
2 changes: 2 additions & 0 deletions kubelet/tests/test_kubelet.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
'kubernetes.memory.limits',
'kubernetes.memory.requests',
'kubernetes.memory.usage',
'kubernetes.memory.working_set',
'kubernetes.memory.rss',
'kubernetes.network.rx_bytes',
'kubernetes.network.tx_bytes'
]
Expand Down