Skip to content

Commit

Permalink
Mixin: Additions to Top tenants dashboard regarding sample rate and d…
Browse files Browse the repository at this point in the history
…iscard rate.

Adds the following rows to the "Top tenants" dashboard:

- By samples rate growth
- By discarded samples rate
- By discarded samples rate growth

These queries are useful for determining what tenants are potentially putting excess
load on distributors and ingesters (and if it increased recently).
  • Loading branch information
stevesg committed May 12, 2022
1 parent 8738cc8 commit eef613d
Show file tree
Hide file tree
Showing 3 changed files with 367 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
* [ENHANCEMENT] Playbooks: Add Alertmanager suggestions for `MimirRequestErrors` and `MimirRequestLatency` #1702
* [ENHANCEMENT] Dashboards: Allow custom datasources. #1749
* [ENHANCEMENT] Dashboards: Add config option `gateway_enabled` (defaults to `true`) to disable gateway panels from dashboards. #1761
* [ENHANCEMENT] Dashboards: Extend Top tenants dashboard with queries for tenants with highest sample rate, discard rate, and discard rate growth. #1842
* [BUGFIX] Dashboards: Fix "Failed evaluation rate" panel on Tenants dashboard. #1629
* [BUGFIX] Honor the configured `per_instance_label` in all dashboards and alerts. #1697

Expand Down
312 changes: 309 additions & 3 deletions operations/mimir-mixin-compiled/dashboards/mimir-top-tenants.json
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,312 @@
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir))\"}[$__rate_interval] @ start()))\n)\n",
"format": "time_series",
"interval": "15s",
"intervalFactor": 2,
"legendFormat": "{{ user }}",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Top $limit users by received samples rate that grew the most between query range start and query range end",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "By samples rate growth",
"titleSize": "h6"
},
{
"collapse": true,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"sort": {
"col": 2,
"desc": true
},
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"styles": [
{
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
},
{
"alias": "samples/s",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
"linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
"thresholds": [ ],
"type": "number",
"unit": "short"
},
{
"alias": "",
"colorMode": null,
"colors": [ ],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"pattern": "/.*/",
"thresholds": [ ],
"type": "string",
"unit": "short"
}
],
"targets": [
{
"expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir)|(distributor|cortex|mimir))\"}[5m])))",
"format": "table",
"instant": true,
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Top $limit users by discarded samples rate in last 5m",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"transform": "table",
"type": "table",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "By discarded samples rate",
"titleSize": "h6"
},
{
"collapse": true,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir)|(distributor|cortex|mimir))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir)|(distributor|cortex|mimir))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir)|(distributor|cortex|mimir))\"}[$__rate_interval] @ start()))\n)\n",
"format": "time_series",
"interval": "15s",
"intervalFactor": 2,
"legendFormat": "{{ user }}",
"legendLink": null,
"step": 10
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Top $limit users by discarded samples rate that grew the most between query range start and query range end",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "By discarded samples rate growth",
"titleSize": "h6"
},
{
"collapse": true,
"height": "250px",
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 9,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [ ],
"sort": {
"col": 2,
"desc": true
Expand Down Expand Up @@ -657,7 +963,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 7,
"id": 10,
"legend": {
"avg": false,
"current": false,
Expand Down Expand Up @@ -785,7 +1091,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 8,
"id": 11,
"legend": {
"avg": false,
"current": false,
Expand Down Expand Up @@ -913,7 +1219,7 @@
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"id": 9,
"id": 12,
"legend": {
"avg": false,
"current": false,
Expand Down
Loading

0 comments on commit eef613d

Please sign in to comment.