Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add kubelet rss and working set memory metrics #2390

Merged
merged 1 commit into from
Oct 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions kubelet/datadog_checks/kubelet/cadvisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
'cpu.*.total']
DEFAULT_ENABLED_GAUGES = [
'memory.usage',
'memory.working_set',
'memory.rss',
'filesystem.usage']
DEFAULT_POD_LEVEL_METRICS = [
'network.*']
Expand Down
26 changes: 19 additions & 7 deletions kubelet/datadog_checks/kubelet/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ def __init__(self, *args, **kwargs):
'container_fs_usage_bytes': self.container_fs_usage_bytes,
'container_fs_limit_bytes': self.container_fs_limit_bytes,
'container_memory_usage_bytes': self.container_memory_usage_bytes,
'container_memory_working_set_bytes': self.container_memory_working_set_bytes,
'container_memory_rss': self.container_memory_rss,
'container_spec_memory_limit_bytes': self.container_spec_memory_limit_bytes
}

Expand Down Expand Up @@ -255,9 +257,9 @@ def _sum_values_by_context(metric, uid_from_labels):

return seen

def _process_container_rate(self, metric_name, metric, scraper_config):
def _process_container_metric(self, type, metric_name, metric, scraper_config):
"""
Takes a simple metric about a container, reports it as a rate.
Takes a simple metric about a container, reports it as a rate or gauge.
If several series are found for a given container, values are summed before submission.
"""
if metric.type not in METRIC_TYPES:
Expand All @@ -283,7 +285,10 @@ def _process_container_rate(self, metric_name, metric, scraper_config):

val = sample[self.SAMPLE_VALUE]

self.rate(metric_name, val, tags)
if "rate" == type:
self.rate(metric_name, val, tags)
elif "gauge" == type:
self.gauge(metric_name, val, tags)

def _process_pod_rate(self, metric_name, metric, scraper_config):
"""
Expand Down Expand Up @@ -380,15 +385,15 @@ def container_cpu_usage_seconds_total(self, metric, scraper_config):
metric.samples[i] = (sample[self.SAMPLE_NAME], sample[self.SAMPLE_LABELS],
sample[self.SAMPLE_VALUE] * 10. ** 9)

self._process_container_rate(metric_name, metric, scraper_config)
self._process_container_metric('rate', metric_name, metric, scraper_config)

def container_fs_reads_bytes_total(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.io.read_bytes'
self._process_container_rate(metric_name, metric, scraper_config)
self._process_container_metric('rate', metric_name, metric, scraper_config)

def container_fs_writes_bytes_total(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.io.write_bytes'
self._process_container_rate(metric_name, metric, scraper_config)
self._process_container_metric('rate', metric_name, metric, scraper_config)

def container_network_receive_bytes_total(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.network.rx_bytes'
Expand Down Expand Up @@ -436,13 +441,20 @@ def container_fs_limit_bytes(self, metric, scraper_config):
self._process_limit_metric('', metric, self.fs_usage_bytes, scraper_config, pct_m_name)

def container_memory_usage_bytes(self, metric, scraper_config):
"""TODO: add swap, cache, failcnt and rss"""
metric_name = scraper_config['namespace'] + '.memory.usage'
if metric.type not in METRIC_TYPES:
self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name))
return
self._process_usage_metric(metric_name, metric, self.mem_usage_bytes, scraper_config)

def container_memory_working_set_bytes(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.memory.working_set'
self._process_container_metric('gauge', metric_name, metric, scraper_config)

def container_memory_rss(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.memory.rss'
self._process_container_metric('gauge', metric_name, metric, scraper_config)

def container_spec_memory_limit_bytes(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.memory.limits'
pct_m_name = scraper_config['namespace'] + '.memory.usage_pct'
Expand Down
4 changes: 3 additions & 1 deletion kubelet/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ kubernetes.io.write_bytes,gauge,,byte,,The amount of bytes written to the disk,0
kubernetes.memory.capacity,gauge,,byte,,The amount of memory (in bytes) in this machine,0,kubelet,k8s.mem.capacity
kubernetes.memory.limits,gauge,,byte,,The limit of memory set,0,kubelet,k8s.mem.limits
kubernetes.memory.requests,gauge,,byte,,The requested memory,0,kubelet,k8s.mem.requests
kubernetes.memory.usage,gauge,,byte,,The amount of memory used,-1,kubelet,k8s.mem
kubernetes.memory.usage,gauge,,byte,,Current memory usage in bytes including all memory regardless of when it was accessed,-1,kubelet,k8s.mem
kubernetes.memory.working_set,gauge,,byte,,Current working set in bytes - this is what the OOM killer is watching for,-1,kubelet,k8s.mem.ws
kubernetes.memory.rss,gauge,,byte,,Size of RSS in bytes,-1,kubelet,k8s.mem.rss
kubernetes.memory.usage_pct,gauge,,fraction,,The percentage of memory used,-1,kubelet,k8s.mem.used_pct
kubernetes.network.rx_bytes,gauge,,byte,second,The amount of bytes per second received,0,kubelet,k8s.net.rx
kubernetes.network.rx_dropped,gauge,,packet,second,The amount of rx packets dropped per second,-1,kubelet,k8s.net.rx.drop
Expand Down
2 changes: 2 additions & 0 deletions kubelet/tests/test_kubelet.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
'kubernetes.memory.limits',
'kubernetes.memory.requests',
'kubernetes.memory.usage',
'kubernetes.memory.working_set',
'kubernetes.memory.rss',
'kubernetes.network.rx_bytes',
'kubernetes.network.tx_bytes'
]
Expand Down