Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[kubelet] Add node filesystem stat from stats/summary #14426

Merged
merged 1 commit into from
Apr 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 23 additions & 9 deletions kubelet/datadog_checks/kubelet/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class SummaryScraperMixin(object):

def process_stats_summary(self, pod_list_utils, stats, instance_tags, main_stats_source):
# Reports system container metrics (node-wide)
self._report_system_container_metrics(stats, instance_tags)
self._report_system_metrics(stats, instance_tags)
# Reports POD & Container metrics. If `main_stats_source` is set, retrieve everything it can
# Otherwise retrieves only what we cannot get elsewhere
self._report_metrics(pod_list_utils, stats, instance_tags, main_stats_source)
Expand Down Expand Up @@ -131,16 +131,20 @@ def _report_container_stats(

# TODO: Review meaning of these metrics as capacity != available + used
# availableBytes = container.get('rootfs', {}).get('availableBytes')
capacity_bytes = container.get('rootfs', {}).get('capacityBytes')
used_bytes = container.get('rootfs', {}).get('usedBytes')
self._report_fs_metrics(container.get('rootfs', {}), self.NAMESPACE, container_tags)

if used_bytes is not None:
self.gauge(self.NAMESPACE + '.filesystem.usage', used_bytes, container_tags)
if used_bytes is not None and capacity_bytes is not None:
self.gauge(self.NAMESPACE + '.filesystem.usage_pct', float(used_bytes) / capacity_bytes, container_tags)
def _report_system_metrics(self, stats, instance_tags):
node_stats = stats.get('node')
if not node_stats:
return

# Node filesystems
self._report_fs_metrics(node_stats.get('fs', {}), self.NAMESPACE + '.node', instance_tags)
self._report_fs_metrics(
node_stats.get("runtime", {}).get("imageFs", {}), self.NAMESPACE + ".node.image", instance_tags
)

def _report_system_container_metrics(self, stats, instance_tags):
sys_containers = stats.get('node', {}).get('systemContainers', [])
sys_containers = node_stats.get('systemContainers', [])
for ctr in sys_containers:
if ctr.get('name') == 'runtime':
mem_rss = ctr.get('memory', {}).get('rssBytes')
Expand All @@ -162,3 +166,13 @@ def _report_system_container_metrics(self, stats, instance_tags):
memory_usage = ctr.get('memory', {}).get('usageBytes')
if memory_usage:
self.gauge(self.NAMESPACE + '.kubelet.memory.usage', memory_usage, instance_tags)

def _report_fs_metrics(self, fs_stats, namespace, tags):
fs_used = fs_stats.get('usedBytes')
fs_capacity = fs_stats.get('capacityBytes')

if fs_used is not None:
self.gauge(namespace + '.filesystem.usage', fs_used, tags)

if fs_capacity is not None:
self.gauge(namespace + '.filesystem.usage_pct', float(fs_used) / fs_capacity, tags)
4 changes: 4 additions & 0 deletions kubelet/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,7 @@ kubernetes.liveness_probe.success.total,count,,,,Cumulative number of successful
kubernetes.liveness_probe.failure.total,count,,,,Cumulative number of failed liveness probe for a container (ALPHA in kubernetes v1.15),-1,kubernetes,k8s.liveness_probe.failure.total,
kubernetes.readiness_probe.success.total,count,,,,Cumulative number of successful readiness probe for a container (ALPHA in kubernetes v1.15),-1,kubernetes,k8s.liveness_probe.success.total,
kubernetes.readiness_probe.failure.total,count,,,,Cumulative number of failed readiness probe for a container (ALPHA in kubernetes v1.15),-1,kubernetes,k8s.liveness_probe.failure.total,
kubernetes.node.filesystem.usage,gauge,,byte,,The amount of disk used at node level,-1,kubernetes,k8s.node.disk.usage,
kubernetes.node.filesystem.usage_pct,gauge,,fraction,,The percentage of disk space used at node level,-1,kubernetes,k8s.node.disk.used_pct,
kubernetes.node.image.filesystem.usage,gauge,,byte,,The amount of disk used on image filesystem (node level),-1,kubernetes,k8s.node.image.disk.usage,
kubernetes.node.image.filesystem.usage_pct,gauge,,fraction,,The percentage of disk used (node level),-1,kubernetes,k8s.node.image.disk.used_pct,
13 changes: 13 additions & 0 deletions kubelet/tests/test_kubelet.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@
'kubernetes.kubelet.cpu.usage',
'kubernetes.kubelet.memory.usage',
'kubernetes.kubelet.memory.rss',
'kubernetes.node.filesystem.usage',
'kubernetes.node.filesystem.usage_pct',
'kubernetes.node.image.filesystem.usage',
'kubernetes.node.image.filesystem.usage_pct',
]

EXPECTED_METRICS_PROMETHEUS = [
Expand Down Expand Up @@ -178,6 +182,10 @@
'kubernetes.runtime.cpu.usage',
'kubernetes.runtime.memory.usage',
'kubernetes.runtime.memory.rss',
'kubernetes.node.filesystem.usage',
'kubernetes.node.filesystem.usage_pct',
'kubernetes.node.image.filesystem.usage',
'kubernetes.node.image.filesystem.usage_pct',
]

COMMON_TAGS = {
Expand Down Expand Up @@ -893,6 +901,10 @@ def test_no_tags_no_metrics(monkeypatch, aggregator, tagger):
aggregator.assert_metric('kubernetes.rest.client.latency.count')
aggregator.assert_metric('kubernetes.rest.client.latency.sum')
aggregator.assert_metric('kubernetes.rest.client.requests')
aggregator.assert_metric('kubernetes.node.filesystem.usage')
aggregator.assert_metric('kubernetes.node.filesystem.usage_pct')
aggregator.assert_metric('kubernetes.node.image.filesystem.usage')
aggregator.assert_metric('kubernetes.node.image.filesystem.usage_pct')
aggregator.assert_all_metrics_covered()


Expand Down Expand Up @@ -1226,6 +1238,7 @@ def test_process_stats_summary_as_source_filtering_by_namespace(monkeypatch):
monkeypatch.setattr(check, 'rate', mock.Mock())
pod_list_utils = PodListUtils(json.loads(mock_from_file('pods_windows.json')))
stats = json.loads(mock_from_file('stats_summary_windows.json'))
del stats['node']

# Namespace is excluded, so it shouldn't report any metrics
monkeypatch.setattr(pod_list_utils, 'is_namespace_excluded', mock.Mock(return_value=True))
Expand Down