From d323579ead8ecfc1bec5644c1ea54e17e8a112f2 Mon Sep 17 00:00:00 2001 From: AliDatadog <125997632+AliDatadog@users.noreply.github.com> Date: Wed, 14 Jun 2023 09:22:08 +0200 Subject: [PATCH] [kube_apiserver_metrics] Rename aggregator_unavailable_apiservice `name` tag to `apiservice_name` (#14738) * handle name tag properly * new metrics transformer for aggregator_unavailable_apiservice which avoids conflicting tag * update dashboard * rename submit_as_gauge_and_monotonic_count (cherry picked from commit d30dd1c1933247f557b47fdfefe025ce1d9b13e9) --- Dockerfile | 7 ++++ .../assets/dashboards/overview.json | 2 +- .../kube_apiserver_metrics.py | 41 ++++++++++++------- .../tests/test_kube_apiserver_metrics_1_19.py | 3 ++ .../tests/test_kube_apiserver_metrics_1_23.py | 2 + .../tests/test_kube_apiserver_metrics_1_24.py | 2 + .../tests/test_kube_apiserver_metrics_1_25.py | 2 + .../tests/test_kube_apiserver_metrics_1_26.py | 2 + 8 files changed, 45 insertions(+), 16 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000..fd2620ec92031 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,7 @@ +FROM datadog/agent:latest +# COPY datadog_checks_base/datadog_checks/base/checks/base.py /opt/datadog-agent/embedded/lib/python3.8/site-packages/datadog_checks/base/checks/base.py +COPY kubelet/datadog_checks/kubelet/kubelet.py /opt/datadog-agent/embedded/lib/python3.8/site-packages/datadog_checks/kubelet/kubelet.py +COPY kubelet/datadog_checks/kubelet/probes.py /opt/datadog-agent/embedded/lib/python3.8/site-packages/datadog_checks/kubelet/probes.py +COPY kubelet/datadog_checks/kubelet/prometheus.py /opt/datadog-agent/embedded/lib/python3.8/site-packages/datadog_checks/kubelet/prometheus.py +COPY kube_apiserver_metrics/datadog_checks/kube_apiserver_metrics/kube_apiserver_metrics.py /opt/datadog-agent/embedded/lib/python3.8/site-packages/datadog_checks/kube_apiserver_metrics/kube_apiserver_metrics.py + diff --git a/kube_apiserver_metrics/assets/dashboards/overview.json b/kube_apiserver_metrics/assets/dashboards/overview.json index 2dfd0fe6970da..7b6fddf67c9dd 100644 --- a/kube_apiserver_metrics/assets/dashboards/overview.json +++ b/kube_apiserver_metrics/assets/dashboards/overview.json @@ -380,7 +380,7 @@ { "data_source": "metrics", "name": "query1", - "query": "max:kube_apiserver.aggregator_unavailable_apiservice{$cluster,$scope} by {kube_namespace,name}", + "query": "max:kube_apiserver.aggregator_unavailable_apiservice{$cluster,$scope} by {kube_namespace, apiservice_name}", "aggregator": "avg" } ], diff --git a/kube_apiserver_metrics/datadog_checks/kube_apiserver_metrics/kube_apiserver_metrics.py b/kube_apiserver_metrics/datadog_checks/kube_apiserver_metrics/kube_apiserver_metrics.py index 9d187a1ee4082..09ab5c3978b0c 100644 --- a/kube_apiserver_metrics/datadog_checks/kube_apiserver_metrics/kube_apiserver_metrics.py +++ b/kube_apiserver_metrics/datadog_checks/kube_apiserver_metrics/kube_apiserver_metrics.py @@ -40,7 +40,6 @@ # fmt: on # For Kubernetes >= 1.14 # (https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.14.md#deprecated-metrics) - 'aggregator_unavailable_apiservice': 'aggregator_unavailable_apiservice', 'rest_client_request_duration_seconds': 'rest_client_request_latency_seconds', 'apiserver_admission_webhook_admission_duration_seconds': 'admission_webhook_admission_latencies_seconds', 'apiserver_admission_step_admission_duration_seconds': 'admission_step_admission_latencies_seconds', @@ -94,6 +93,7 @@ class KubeAPIServerMetricsCheck(OpenMetricsBaseCheck): def __init__(self, name, init_config, instances=None): # Set up metrics_transformers self.metric_transformers = { + 'aggregator_unavailable_apiservice': self.aggregator_unavailable_apiservice, 'apiserver_audit_event_total': self.apiserver_audit_event_total, 'rest_client_requests_total': self.rest_client_requests_total, 'apiserver_request_count': self.apiserver_request_count, @@ -157,7 +157,7 @@ def _create_kube_apiserver_metrics_instance(self, instance): return kube_apiserver_metrics_instance - def submit_as_gauge_and_monotonic_count(self, metric_suffix, metric, scraper_config): + def submit_metric(self, metric_suffix, metric, scraper_config, gauge=True, monotonic_count=True): """ submit a kube_apiserver_metrics metric both as a gauge (for compatibility) and as a monotonic_count """ @@ -168,34 +168,45 @@ def submit_as_gauge_and_monotonic_count(self, metric_suffix, metric, scraper_con for label_name, label_value in iteritems(sample[self.SAMPLE_LABELS]): _tags.append('{}:{}'.format(label_name, label_value)) - # submit raw metric - self.gauge(metric_name, sample[self.SAMPLE_VALUE], _tags) - # submit rate metric - self.monotonic_count(metric_name + '.count', sample[self.SAMPLE_VALUE], _tags) + if gauge: + # submit raw metric + self.gauge(metric_name, sample[self.SAMPLE_VALUE], _tags) + if monotonic_count: + # submit rate metric + self.monotonic_count(metric_name + '.count', sample[self.SAMPLE_VALUE], _tags) + + def aggregator_unavailable_apiservice(self, metric, scraper_config): + """ + This function replaces the tag "name" by "apiservice_name". + It assumes that every sample is tagged with `name`. + """ + for sample in metric.samples: + sample[self.SAMPLE_LABELS]["apiservice_name"] = sample[self.SAMPLE_LABELS].pop("name") + self.submit_metric('.aggregator_unavailable_apiservice', metric, scraper_config, monotonic_count=False) def apiserver_audit_event_total(self, metric, scraper_config): - self.submit_as_gauge_and_monotonic_count('.audit_event', metric, scraper_config) + self.submit_metric('.audit_event', metric, scraper_config) def rest_client_requests_total(self, metric, scraper_config): - self.submit_as_gauge_and_monotonic_count('.rest_client_requests_total', metric, scraper_config) + self.submit_metric('.rest_client_requests_total', metric, scraper_config) def http_requests_total(self, metric, scraper_config): - self.submit_as_gauge_and_monotonic_count('.http_requests_total', metric, scraper_config) + self.submit_metric('.http_requests_total', metric, scraper_config) def apiserver_request_count(self, metric, scraper_config): - self.submit_as_gauge_and_monotonic_count('.apiserver_request_count', metric, scraper_config) + self.submit_metric('.apiserver_request_count', metric, scraper_config) def apiserver_dropped_requests_total(self, metric, scraper_config): - self.submit_as_gauge_and_monotonic_count('.apiserver_dropped_requests_total', metric, scraper_config) + self.submit_metric('.apiserver_dropped_requests_total', metric, scraper_config) def authenticated_user_requests(self, metric, scraper_config): - self.submit_as_gauge_and_monotonic_count('.authenticated_user_requests', metric, scraper_config) + self.submit_metric('.authenticated_user_requests', metric, scraper_config) def apiserver_request_total(self, metric, scraper_config): - self.submit_as_gauge_and_monotonic_count('.apiserver_request_total', metric, scraper_config) + self.submit_metric('.apiserver_request_total', metric, scraper_config) def apiserver_request_terminations_total(self, metric, scraper_config): - self.submit_as_gauge_and_monotonic_count('.apiserver_request_terminations_total', metric, scraper_config) + self.submit_metric('.apiserver_request_terminations_total', metric, scraper_config) def apiserver_admission_webhook_fail_open_count(self, metric, scraper_config): - self.submit_as_gauge_and_monotonic_count('.apiserver_admission_webhook_fail_open_count', metric, scraper_config) + self.submit_metric('.apiserver_admission_webhook_fail_open_count', metric, scraper_config) diff --git a/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_19.py b/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_19.py index c6d1f4eaec3c7..997a7df4c9477 100644 --- a/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_19.py +++ b/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_19.py @@ -62,6 +62,7 @@ class TestKubeAPIServerMetrics: 'aggregator_unavailable_apiservice', 'envelope_encryption_dek_cache_fill_percent', ] + COUNT_METRICS = [ 'audit_event.count', 'rest_client_requests_total.count', @@ -86,4 +87,6 @@ def test_check(self, dd_run_check, aggregator, mock_http_response): metric_to_assert = NAMESPACE + "." + metric aggregator.assert_metric(metric_to_assert) aggregator.assert_metric_has_tag(metric_to_assert, customtag) + if "aggregator_unavailable_apiservice" in metric: + aggregator.assert_metric_has_tag(metric_to_assert, "apiservice_name:v1.") aggregator.assert_all_metrics_covered() diff --git a/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_23.py b/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_23.py index 7c2822f021a8f..ae0159a5e8eaf 100644 --- a/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_23.py +++ b/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_23.py @@ -92,4 +92,6 @@ def test_check(self, dd_run_check, aggregator, mock_http_response): metric_to_assert = NAMESPACE + "." + metric aggregator.assert_metric(metric_to_assert) aggregator.assert_metric_has_tag(metric_to_assert, customtag) + if "aggregator_unavailable_apiservice" in metric: + aggregator.assert_metric_has_tag(metric_to_assert, "apiservice_name:v1.") aggregator.assert_all_metrics_covered() diff --git a/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_24.py b/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_24.py index 8d138226c7938..8c95fd75b9182 100644 --- a/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_24.py +++ b/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_24.py @@ -94,4 +94,6 @@ def test_check(self, dd_run_check, aggregator, mock_http_response): metric_to_assert = NAMESPACE + "." + metric aggregator.assert_metric(metric_to_assert) aggregator.assert_metric_has_tag(metric_to_assert, customtag) + if "aggregator_unavailable_apiservice" in metric: + aggregator.assert_metric_has_tag(metric_to_assert, "apiservice_name:v1.") aggregator.assert_all_metrics_covered() diff --git a/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_25.py b/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_25.py index 359524d45e266..c243d42eaa8ea 100644 --- a/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_25.py +++ b/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_25.py @@ -113,4 +113,6 @@ def test_check(self, dd_run_check, aggregator, mock_get): for metric in self.METRICS + self.COUNT_METRICS: aggregator.assert_metric(metric) aggregator.assert_metric_has_tag(metric, customtag) + if "aggregator_unavailable_apiservice" in metric: + aggregator.assert_metric_has_tag(metric, "apiservice_name:v1.") aggregator.assert_all_metrics_covered() diff --git a/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_26.py b/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_26.py index e26172e1b61ef..98a26546817eb 100644 --- a/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_26.py +++ b/kube_apiserver_metrics/tests/test_kube_apiserver_metrics_1_26.py @@ -90,4 +90,6 @@ def test_check(self, dd_run_check, aggregator, mock_http_response): metric_to_assert = NAMESPACE + "." + metric aggregator.assert_metric(metric_to_assert) aggregator.assert_metric_has_tag(metric_to_assert, customtag) + if "aggregator_unavailable_apiservice" in metric: + aggregator.assert_metric_has_tag(metric_to_assert, "apiservice_name:v1.") aggregator.assert_all_metrics_covered()