diff --git a/kubelet/datadog_checks/kubelet/cadvisor.py b/kubelet/datadog_checks/kubelet/cadvisor.py index a4145529b3c88..0273d93663017 100644 --- a/kubelet/datadog_checks/kubelet/cadvisor.py +++ b/kubelet/datadog_checks/kubelet/cadvisor.py @@ -26,6 +26,8 @@ 'cpu.*.total'] DEFAULT_ENABLED_GAUGES = [ 'memory.usage', + 'memory.working_set', + 'memory.rss', 'filesystem.usage'] DEFAULT_POD_LEVEL_METRICS = [ 'network.*'] diff --git a/kubelet/datadog_checks/kubelet/prometheus.py b/kubelet/datadog_checks/kubelet/prometheus.py index 7e3000ecd70c1..c485382ce56f8 100644 --- a/kubelet/datadog_checks/kubelet/prometheus.py +++ b/kubelet/datadog_checks/kubelet/prometheus.py @@ -43,6 +43,8 @@ def __init__(self, *args, **kwargs): 'container_fs_usage_bytes': self.container_fs_usage_bytes, 'container_fs_limit_bytes': self.container_fs_limit_bytes, 'container_memory_usage_bytes': self.container_memory_usage_bytes, + 'container_memory_working_set_bytes': self.container_memory_working_set_bytes, + 'container_memory_rss': self.container_memory_rss, 'container_spec_memory_limit_bytes': self.container_spec_memory_limit_bytes } @@ -255,9 +257,9 @@ def _sum_values_by_context(metric, uid_from_labels): return seen - def _process_container_rate(self, metric_name, metric, scraper_config): + def _process_container_metric(self, type, metric_name, metric, scraper_config): """ - Takes a simple metric about a container, reports it as a rate. + Takes a simple metric about a container, reports it as a rate or gauge. If several series are found for a given container, values are summed before submission. """ if metric.type not in METRIC_TYPES: @@ -283,7 +285,10 @@ def _process_container_rate(self, metric_name, metric, scraper_config): val = sample[self.SAMPLE_VALUE] - self.rate(metric_name, val, tags) + if "rate" == type: + self.rate(metric_name, val, tags) + elif "gauge" == type: + self.gauge(metric_name, val, tags) def _process_pod_rate(self, metric_name, metric, scraper_config): """ @@ -380,15 +385,15 @@ def container_cpu_usage_seconds_total(self, metric, scraper_config): metric.samples[i] = (sample[self.SAMPLE_NAME], sample[self.SAMPLE_LABELS], sample[self.SAMPLE_VALUE] * 10. ** 9) - self._process_container_rate(metric_name, metric, scraper_config) + self._process_container_metric('rate', metric_name, metric, scraper_config) def container_fs_reads_bytes_total(self, metric, scraper_config): metric_name = scraper_config['namespace'] + '.io.read_bytes' - self._process_container_rate(metric_name, metric, scraper_config) + self._process_container_metric('rate', metric_name, metric, scraper_config) def container_fs_writes_bytes_total(self, metric, scraper_config): metric_name = scraper_config['namespace'] + '.io.write_bytes' - self._process_container_rate(metric_name, metric, scraper_config) + self._process_container_metric('rate', metric_name, metric, scraper_config) def container_network_receive_bytes_total(self, metric, scraper_config): metric_name = scraper_config['namespace'] + '.network.rx_bytes' @@ -436,13 +441,20 @@ def container_fs_limit_bytes(self, metric, scraper_config): self._process_limit_metric('', metric, self.fs_usage_bytes, scraper_config, pct_m_name) def container_memory_usage_bytes(self, metric, scraper_config): - """TODO: add swap, cache, failcnt and rss""" metric_name = scraper_config['namespace'] + '.memory.usage' if metric.type not in METRIC_TYPES: self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name)) return self._process_usage_metric(metric_name, metric, self.mem_usage_bytes, scraper_config) + def container_memory_working_set_bytes(self, metric, scraper_config): + metric_name = scraper_config['namespace'] + '.memory.working_set' + self._process_container_metric('gauge', metric_name, metric, scraper_config) + + def container_memory_rss(self, metric, scraper_config): + metric_name = scraper_config['namespace'] + '.memory.rss' + self._process_container_metric('gauge', metric_name, metric, scraper_config) + def container_spec_memory_limit_bytes(self, metric, scraper_config): metric_name = scraper_config['namespace'] + '.memory.limits' pct_m_name = scraper_config['namespace'] + '.memory.usage_pct' diff --git a/kubelet/metadata.csv b/kubelet/metadata.csv index ea8319d4f8faa..fb931510677bd 100644 --- a/kubelet/metadata.csv +++ b/kubelet/metadata.csv @@ -10,7 +10,9 @@ kubernetes.io.write_bytes,gauge,,byte,,The amount of bytes written to the disk,0 kubernetes.memory.capacity,gauge,,byte,,The amount of memory (in bytes) in this machine,0,kubelet,k8s.mem.capacity kubernetes.memory.limits,gauge,,byte,,The limit of memory set,0,kubelet,k8s.mem.limits kubernetes.memory.requests,gauge,,byte,,The requested memory,0,kubelet,k8s.mem.requests -kubernetes.memory.usage,gauge,,byte,,The amount of memory used,-1,kubelet,k8s.mem +kubernetes.memory.usage,gauge,,byte,,Current memory usage in bytes including all memory regardless of when it was accessed,-1,kubelet,k8s.mem +kubernetes.memory.working_set,gauge,,byte,,Current working set in bytes - this is what the OOM killer is watching for,-1,kubelet,k8s.mem.ws +kubernetes.memory.rss,gauge,,byte,,Size of RSS in bytes,-1,kubelet,k8s.mem.rss kubernetes.memory.usage_pct,gauge,,fraction,,The percentage of memory used,-1,kubelet,k8s.mem.used_pct kubernetes.network.rx_bytes,gauge,,byte,second,The amount of bytes per second received,0,kubelet,k8s.net.rx kubernetes.network.rx_dropped,gauge,,packet,second,The amount of rx packets dropped per second,-1,kubelet,k8s.net.rx.drop diff --git a/kubelet/tests/test_kubelet.py b/kubelet/tests/test_kubelet.py index d28900b3f769b..9ca59ade20039 100644 --- a/kubelet/tests/test_kubelet.py +++ b/kubelet/tests/test_kubelet.py @@ -47,6 +47,8 @@ 'kubernetes.memory.limits', 'kubernetes.memory.requests', 'kubernetes.memory.usage', + 'kubernetes.memory.working_set', + 'kubernetes.memory.rss', 'kubernetes.network.rx_bytes', 'kubernetes.network.tx_bytes' ]