From 184032d6c23652914db5d3599c9b656a36d28e61 Mon Sep 17 00:00:00 2001 From: Steve Simpson Date: Thu, 12 May 2022 12:59:13 +0200 Subject: [PATCH] Mixin: Show ingestion rate limit and rule group limit on Tenants dashboard. Whilst diagnosing a recent issue, we thought it would be useful to show the current ingestion rate limit for the tenant. As the limit is applied to `cortex_distributor_received_samples_total`, the limit is shown on the panel which displays this metric. ("Distributor samples received (accepted) rate"). Also added `ruler_max_rule_groups_per_tenant` while in the area. We don't currently display the number of exemplars in storage on the dashboard anywhere, so cannot add `max_global_exemplars_per_user` right now. --- CHANGELOG.md | 1 + .../dashboards/mimir-tenants.json | 50 +++++++++++++-- .../mimir-mixin/dashboards/tenants.libsonnet | 61 +++++++++++-------- 3 files changed, 84 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36db200175c..6cc0772ccf0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -115,6 +115,7 @@ * [ENHANCEMENT] Dashboards: Allow custom datasources. #1749 * [ENHANCEMENT] Dashboards: Add config option `gateway_enabled` (defaults to `true`) to disable gateway panels from dashboards. #1761 * [ENHANCEMENT] Dashboards: Extend Top tenants dashboard with queries for tenants with highest sample rate, discard rate, and discard rate growth. #1842 +* [ENHANCEMENT] Dashboards: Show ingestion rate limit and rule group limit on Tenants dashboard. #1845 * [BUGFIX] Dashboards: Fix "Failed evaluation rate" panel on Tenants dashboard. #1629 * [BUGFIX] Honor the configured `per_instance_label` in all dashboards and alerts. #1697 diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-tenants.json b/operations/mimir-mixin-compiled/dashboards/mimir-tenants.json index 1c01242dc79..e1ebb0f47d3 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-tenants.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-tenants.json @@ -470,7 +470,13 @@ "fill": 1, "id": 7, "legend": { - "show": false + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, "lines": true, "linewidth": 1, @@ -480,7 +486,13 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "limit", + "dashes": true, + "fill": 0 + } + ], "spaceLength": 10, "span": 3, "stack": false, @@ -494,6 +506,15 @@ "legendFormat": "rate", "legendLink": null, "step": 10 + }, + { + "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/(overrides-exporter)\", limit_name=\"ingestion_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/(overrides-exporter)\", limit_name=\"ingestion_rate\"})\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "limit", + "legendLink": null, + "step": 10 } ], "thresholds": [ ], @@ -1202,7 +1223,13 @@ "fill": 1, "id": 16, "legend": { - "show": false + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, "lines": true, "linewidth": 1, @@ -1212,7 +1239,13 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ ], + "seriesOverrides": [ + { + "alias": "limit", + "dashes": true, + "fill": 0 + } + ], "spaceLength": 10, "span": 3, "stack": false, @@ -1226,6 +1259,15 @@ "legendFormat": "groups", "legendLink": null, "step": 10 + }, + { + "expr": "max(cortex_limits_overrides{cluster=~\"$cluster\", job=~\"($namespace)/(overrides-exporter)\", limit_name=\"ruler_max_rule_groups_per_tenant\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster=~\"$cluster\", job=~\"($namespace)/(overrides-exporter)\", limit_name=\"ruler_max_rule_groups_per_tenant\"})\n", + "format": "time_series", + "interval": "15s", + "intervalFactor": 2, + "legendFormat": "limit", + "legendLink": null, + "step": 10 } ], "thresholds": [ ], diff --git a/operations/mimir-mixin/dashboards/tenants.libsonnet b/operations/mimir-mixin/dashboards/tenants.libsonnet index f05aa38f183..25ae99e988e 100644 --- a/operations/mimir-mixin/dashboards/tenants.libsonnet +++ b/operations/mimir-mixin/dashboards/tenants.libsonnet @@ -2,6 +2,21 @@ local utils = import 'mixin-utils/utils.libsonnet'; local filename = 'mimir-tenants.json'; (import 'dashboard-utils.libsonnet') { + local user_limits_overrides_query(limit_name) = ||| + max(cortex_limits_overrides{%(overrides_exporter)s, limit_name="%(limit_name)s", user="$user"}) + or + max(cortex_limits_defaults{%(overrides_exporter)s, limit_name="%(limit_name)s"}) + ||| % { + overrides_exporter: $.jobMatcher($._config.job_names.overrides_exporter), + limit_name: limit_name, + }, + + local limit_style = { + alias: 'limit', + fill: 0, + dashes: true, + }, + [filename]: ($.dashboard('Tenants') + { uid: std.md5(filename) }) .addClusterSelectorTemplates() @@ -40,13 +55,7 @@ local filename = 'mimir-tenants.json'; distributor: $.jobMatcher($._config.job_names.distributor), group_by_cluster: $._config.group_by_cluster, }, - ||| - max(cortex_limits_overrides{%(overrides_exporter)s, limit_name="max_global_series_per_user", user="$user"}) - or - max(cortex_limits_defaults{%(overrides_exporter)s, limit_name="max_global_series_per_user"}) - ||| % { - overrides_exporter: $.jobMatcher($._config.job_names.overrides_exporter), - }, + user_limits_overrides_query('max_global_series_per_user'), ||| sum( cortex_ingester_active_series{%(ingester)s, user="$user"} @@ -77,15 +86,7 @@ local filename = 'mimir-tenants.json'; 'active ({{ name }})', ], ) + - { - seriesOverrides: [ - { - alias: 'limit', - fill: 0, - dashes: true, - }, - ], - } + + { seriesOverrides: [limit_style] } + $.panelDescription( title, ||| @@ -180,11 +181,17 @@ local filename = 'mimir-tenants.json'; local title = 'Distributor samples received (accepted) rate'; $.panel(title) + $.queryPanel( - 'sum(rate(cortex_distributor_received_samples_total{%(job)s, user="$user"}[$__rate_interval]))' - % { job: $.jobMatcher($._config.job_names.distributor) }, - 'rate', + [ + 'sum(rate(cortex_distributor_received_samples_total{%(job)s, user="$user"}[$__rate_interval]))' + % { job: $.jobMatcher($._config.job_names.distributor) }, + user_limits_overrides_query('ingestion_rate'), + ], + [ + 'rate', + 'limit', + ], ) + - { legend: { show: false } } + + { seriesOverrides: [limit_style] } + $.panelDescription( title, ||| @@ -359,11 +366,17 @@ local filename = 'mimir-tenants.json'; local title = 'Number of groups'; $.panel(title) + $.queryPanel( - 'count(sum by (rule_group) (cortex_prometheus_rule_group_rules{%(job)s, user="$user"}))' - % { job: $.jobMatcher($._config.job_names.ruler) }, - 'groups', + [ + 'count(sum by (rule_group) (cortex_prometheus_rule_group_rules{%(job)s, user="$user"}))' + % { job: $.jobMatcher($._config.job_names.ruler) }, + user_limits_overrides_query('ruler_max_rule_groups_per_tenant'), + ], + [ + 'groups', + 'limit', + ] ) + - { legend: { show: false } } + + { seriesOverrides: [limit_style] } + $.panelDescription( title, |||