diff --git a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml index c749ab1f8a8..4a6ca58984b 100644 --- a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml +++ b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml @@ -20762,7 +20762,7 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": true, "steppedLine": false, "targets": [ @@ -20837,7 +20837,7 @@ data: "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ @@ -20895,14 +20895,26 @@ data: "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "### Latency (Time in Queue) by Queue Dimension\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", "fill": 1, "id": 12, "legend": { @@ -20933,13 +20945,151 @@ data: "format": "time_series", "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "99th Percentile Latency by Queue Dimension", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ { "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", "format": "time_series", "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "B" + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "50th Percentile Latency by Queue Dimension", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ { "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", "format": "time_series", @@ -20950,7 +21100,7 @@ data: "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Latency (Time in Queue) by Queue Dimension", + "title": "Average Latency by Queue Dimension", "tooltip": { "shared": false, "sort": 0, @@ -20988,7 +21138,7 @@ data: "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Query-scheduler", + "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", "titleSize": "h6" }, { @@ -21002,7 +21152,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 13, + "id": 15, "legend": { "avg": false, "current": false, @@ -21076,7 +21226,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 14, + "id": 16, "legend": { "avg": false, "current": false, @@ -21190,7 +21340,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 15, + "id": 17, "legend": { "avg": false, "current": false, @@ -21264,7 +21414,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 16, + "id": 18, "legend": { "avg": false, "current": false, @@ -21370,7 +21520,7 @@ data: }, "overrides": [ ] }, - "id": 17, + "id": 19, "links": [ ], "options": { "legend": { @@ -21424,7 +21574,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 18, + "id": 20, "legend": { "avg": false, "current": false, @@ -21498,7 +21648,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 19, + "id": 21, "legend": { "avg": false, "current": false, @@ -21604,7 +21754,7 @@ data: }, "overrides": [ ] }, - "id": 20, + "id": 22, "links": [ ], "options": { "legend": { @@ -21658,7 +21808,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 21, + "id": 23, "legend": { "avg": false, "current": false, @@ -21732,7 +21882,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 22, + "id": 24, "legend": { "avg": false, "current": false, @@ -21838,7 +21988,7 @@ data: }, "overrides": [ ] }, - "id": 23, + "id": 25, "links": [ ], "options": { "legend": { @@ -21883,7 +22033,7 @@ data: "datasource": "$datasource", "description": "### Replicas\nThe maximum, and current number of querier replicas.\nPlease note that the current number of replicas can still show 1 replica even when scaled to 0.\nSince HPA never reports 0 replicas, the query will report 0 only if the HPA is not active.\n\n", "fill": 1, - "id": 24, + "id": 26, "legend": { "avg": false, "current": false, @@ -21985,7 +22135,7 @@ data: "datasource": "$datasource", "description": "### Scaling metric (desired replicas)\nThis panel shows the result scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints which are applied later.\n\n", "fill": 1, - "id": 25, + "id": 27, "legend": { "avg": false, "current": false, @@ -22065,7 +22215,7 @@ data: "datasource": "$datasource", "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler woudln't work properly.\n\n", "fill": 1, - "id": 26, + "id": 28, "legend": { "avg": false, "current": false, @@ -22161,7 +22311,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 27, + "id": 29, "legend": { "avg": false, "current": false, @@ -22235,7 +22385,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 28, + "id": 30, "legend": { "avg": false, "current": false, @@ -22333,7 +22483,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 29, + "id": 31, "legend": { "avg": false, "current": false, @@ -22407,7 +22557,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 30, + "id": 32, "legend": { "avg": false, "current": false, @@ -22500,7 +22650,7 @@ data: "datasource": "$datasource", "description": "### Hit ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because the store-gateway by default has an\nin-memory blocks index cache.\n\n", "fill": 1, - "id": 31, + "id": 33, "legend": { "avg": false, "current": false, @@ -22586,7 +22736,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 32, + "id": 34, "legend": { "avg": false, "current": false, @@ -22660,7 +22810,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 33, + "id": 35, "legend": { "avg": false, "current": false, @@ -22752,7 +22902,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 34, + "id": 36, "legend": { "avg": false, "current": false, @@ -22838,7 +22988,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 35, + "id": 37, "legend": { "avg": false, "current": false, @@ -22912,7 +23062,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 36, + "id": 38, "legend": { "avg": false, "current": false, @@ -23004,7 +23154,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 37, + "id": 39, "legend": { "avg": false, "current": false, @@ -23090,7 +23240,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 38, + "id": 40, "legend": { "avg": false, "current": false, @@ -23164,7 +23314,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 39, + "id": 41, "legend": { "avg": false, "current": false, @@ -23256,7 +23406,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 40, + "id": 42, "legend": { "avg": false, "current": false, @@ -23342,7 +23492,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 41, + "id": 43, "legend": { "avg": false, "current": false, @@ -23419,7 +23569,7 @@ data: "unit": "percentunit" } }, - "id": 42, + "id": 44, "links": [ ], "options": { "legend": { @@ -23449,7 +23599,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 43, + "id": 45, "legend": { "avg": false, "current": false, @@ -23535,7 +23685,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 44, + "id": 46, "legend": { "avg": false, "current": false, @@ -23633,7 +23783,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 45, + "id": 47, "legend": { "avg": false, "current": false, @@ -23719,7 +23869,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 46, + "id": 48, "legend": { "avg": false, "current": false, @@ -23805,7 +23955,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 47, + "id": 49, "legend": { "avg": false, "current": false, @@ -23891,7 +24041,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 48, + "id": 50, "legend": { "avg": false, "current": false, @@ -23989,7 +24139,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 49, + "id": 51, "legend": { "avg": false, "current": false, @@ -24066,7 +24216,7 @@ data: "unit": "percentunit" } }, - "id": 50, + "id": 52, "links": [ ], "options": { "legend": { @@ -24096,7 +24246,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 51, + "id": 53, "legend": { "avg": false, "current": false, @@ -24182,7 +24332,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 52, + "id": 54, "legend": { "avg": false, "current": false, @@ -24280,7 +24430,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 53, + "id": 55, "legend": { "avg": false, "current": false, @@ -24366,7 +24516,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 54, + "id": 56, "legend": { "avg": false, "current": false, @@ -24452,7 +24602,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 55, + "id": 57, "legend": { "avg": false, "current": false, @@ -24538,7 +24688,7 @@ data: "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 56, + "id": 58, "legend": { "avg": false, "current": false, @@ -26340,7 +26490,7 @@ data: "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", "format": "time_series", "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "B" + "refId": "A" }, { "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-reads.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-reads.json index 8fd2eaa8211..02fd5770ea0 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-reads.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-reads.json @@ -721,7 +721,7 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": true, "steppedLine": false, "targets": [ @@ -796,7 +796,7 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ @@ -854,14 +854,26 @@ "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "### Latency (Time in Queue) by Queue Dimension\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", "fill": 1, "id": 12, "legend": { @@ -892,13 +904,151 @@ "format": "time_series", "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "99th Percentile Latency by Queue Dimension", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ { "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", "format": "time_series", "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "B" + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "50th Percentile Latency by Queue Dimension", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ { "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", "format": "time_series", @@ -909,7 +1059,7 @@ "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Latency (Time in Queue) by Queue Dimension", + "title": "Average Latency by Queue Dimension", "tooltip": { "shared": false, "sort": 0, @@ -947,7 +1097,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Query-scheduler", + "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", "titleSize": "h6" }, { @@ -961,7 +1111,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 13, + "id": 15, "legend": { "avg": false, "current": false, @@ -1035,7 +1185,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 14, + "id": 16, "legend": { "avg": false, "current": false, @@ -1149,7 +1299,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 15, + "id": 17, "legend": { "avg": false, "current": false, @@ -1223,7 +1373,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 16, + "id": 18, "legend": { "avg": false, "current": false, @@ -1329,7 +1479,7 @@ }, "overrides": [ ] }, - "id": 17, + "id": 19, "links": [ ], "options": { "legend": { @@ -1383,7 +1533,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 18, + "id": 20, "legend": { "avg": false, "current": false, @@ -1457,7 +1607,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 19, + "id": 21, "legend": { "avg": false, "current": false, @@ -1563,7 +1713,7 @@ }, "overrides": [ ] }, - "id": 20, + "id": 22, "links": [ ], "options": { "legend": { @@ -1617,7 +1767,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 21, + "id": 23, "legend": { "avg": false, "current": false, @@ -1691,7 +1841,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 22, + "id": 24, "legend": { "avg": false, "current": false, @@ -1797,7 +1947,7 @@ }, "overrides": [ ] }, - "id": 23, + "id": 25, "links": [ ], "options": { "legend": { @@ -1842,7 +1992,7 @@ "datasource": "$datasource", "description": "### Replicas\nThe maximum, and current number of querier replicas.\nPlease note that the current number of replicas can still show 1 replica even when scaled to 0.\nSince HPA never reports 0 replicas, the query will report 0 only if the HPA is not active.\n\n", "fill": 1, - "id": 24, + "id": 26, "legend": { "avg": false, "current": false, @@ -1944,7 +2094,7 @@ "datasource": "$datasource", "description": "### Scaling metric (desired replicas)\nThis panel shows the result scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints which are applied later.\n\n", "fill": 1, - "id": 25, + "id": 27, "legend": { "avg": false, "current": false, @@ -2024,7 +2174,7 @@ "datasource": "$datasource", "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler woudln't work properly.\n\n", "fill": 1, - "id": 26, + "id": 28, "legend": { "avg": false, "current": false, @@ -2120,7 +2270,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 27, + "id": 29, "legend": { "avg": false, "current": false, @@ -2194,7 +2344,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 28, + "id": 30, "legend": { "avg": false, "current": false, @@ -2292,7 +2442,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 29, + "id": 31, "legend": { "avg": false, "current": false, @@ -2366,7 +2516,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 30, + "id": 32, "legend": { "avg": false, "current": false, @@ -2459,7 +2609,7 @@ "datasource": "$datasource", "description": "### Hit ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because the store-gateway by default has an\nin-memory blocks index cache.\n\n", "fill": 1, - "id": 31, + "id": 33, "legend": { "avg": false, "current": false, @@ -2545,7 +2695,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 32, + "id": 34, "legend": { "avg": false, "current": false, @@ -2619,7 +2769,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 33, + "id": 35, "legend": { "avg": false, "current": false, @@ -2711,7 +2861,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 34, + "id": 36, "legend": { "avg": false, "current": false, @@ -2797,7 +2947,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 35, + "id": 37, "legend": { "avg": false, "current": false, @@ -2871,7 +3021,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 36, + "id": 38, "legend": { "avg": false, "current": false, @@ -2963,7 +3113,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 37, + "id": 39, "legend": { "avg": false, "current": false, @@ -3049,7 +3199,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 38, + "id": 40, "legend": { "avg": false, "current": false, @@ -3123,7 +3273,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 39, + "id": 41, "legend": { "avg": false, "current": false, @@ -3215,7 +3365,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 40, + "id": 42, "legend": { "avg": false, "current": false, @@ -3301,7 +3451,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 41, + "id": 43, "legend": { "avg": false, "current": false, @@ -3378,7 +3528,7 @@ "unit": "percentunit" } }, - "id": 42, + "id": 44, "links": [ ], "options": { "legend": { @@ -3408,7 +3558,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 43, + "id": 45, "legend": { "avg": false, "current": false, @@ -3494,7 +3644,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 44, + "id": 46, "legend": { "avg": false, "current": false, @@ -3592,7 +3742,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 45, + "id": 47, "legend": { "avg": false, "current": false, @@ -3678,7 +3828,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 46, + "id": 48, "legend": { "avg": false, "current": false, @@ -3764,7 +3914,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 47, + "id": 49, "legend": { "avg": false, "current": false, @@ -3850,7 +4000,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 48, + "id": 50, "legend": { "avg": false, "current": false, @@ -3948,7 +4098,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 49, + "id": 51, "legend": { "avg": false, "current": false, @@ -4025,7 +4175,7 @@ "unit": "percentunit" } }, - "id": 50, + "id": 52, "links": [ ], "options": { "legend": { @@ -4055,7 +4205,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 51, + "id": 53, "legend": { "avg": false, "current": false, @@ -4141,7 +4291,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 52, + "id": 54, "legend": { "avg": false, "current": false, @@ -4239,7 +4389,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 53, + "id": 55, "legend": { "avg": false, "current": false, @@ -4325,7 +4475,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 54, + "id": 56, "legend": { "avg": false, "current": false, @@ -4411,7 +4561,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 55, + "id": 57, "legend": { "avg": false, "current": false, @@ -4497,7 +4647,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 56, + "id": 58, "legend": { "avg": false, "current": false, diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-remote-ruler-reads.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-remote-ruler-reads.json index 8ee0548fb1a..33bc8b469e7 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-remote-ruler-reads.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-remote-ruler-reads.json @@ -593,7 +593,7 @@ "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", "format": "time_series", "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "B" + "refId": "A" }, { "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-reads.json b/operations/mimir-mixin-compiled/dashboards/mimir-reads.json index 9bcb00b144a..0d9dd32b986 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-reads.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-reads.json @@ -721,7 +721,7 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": true, "steppedLine": false, "targets": [ @@ -796,7 +796,7 @@ "renderer": "flot", "seriesOverrides": [ ], "spaceLength": 10, - "span": 4, + "span": 6, "stack": false, "steppedLine": false, "targets": [ @@ -854,14 +854,26 @@ "show": false } ] - }, + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "### Latency (Time in Queue) by Queue Dimension\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", "fill": 1, "id": 12, "legend": { @@ -892,13 +904,151 @@ "format": "time_series", "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "99th Percentile Latency by Queue Dimension", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ { "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", "format": "time_series", "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "B" + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "50th Percentile Latency by Queue Dimension", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ { "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", "format": "time_series", @@ -909,7 +1059,7 @@ "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Latency (Time in Queue) by Queue Dimension", + "title": "Average Latency by Queue Dimension", "tooltip": { "shared": false, "sort": 0, @@ -947,7 +1097,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Query-scheduler", + "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", "titleSize": "h6" }, { @@ -961,7 +1111,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 13, + "id": 15, "legend": { "avg": false, "current": false, @@ -1035,7 +1185,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 14, + "id": 16, "legend": { "avg": false, "current": false, @@ -1149,7 +1299,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 15, + "id": 17, "legend": { "avg": false, "current": false, @@ -1223,7 +1373,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 16, + "id": 18, "legend": { "avg": false, "current": false, @@ -1329,7 +1479,7 @@ }, "overrides": [ ] }, - "id": 17, + "id": 19, "links": [ ], "options": { "legend": { @@ -1383,7 +1533,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 18, + "id": 20, "legend": { "avg": false, "current": false, @@ -1457,7 +1607,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 19, + "id": 21, "legend": { "avg": false, "current": false, @@ -1563,7 +1713,7 @@ }, "overrides": [ ] }, - "id": 20, + "id": 22, "links": [ ], "options": { "legend": { @@ -1617,7 +1767,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 21, + "id": 23, "legend": { "avg": false, "current": false, @@ -1691,7 +1841,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 22, + "id": 24, "legend": { "avg": false, "current": false, @@ -1797,7 +1947,7 @@ }, "overrides": [ ] }, - "id": 23, + "id": 25, "links": [ ], "options": { "legend": { @@ -1842,7 +1992,7 @@ "datasource": "$datasource", "description": "### Replicas\nThe maximum, and current number of querier replicas.\nPlease note that the current number of replicas can still show 1 replica even when scaled to 0.\nSince HPA never reports 0 replicas, the query will report 0 only if the HPA is not active.\n\n", "fill": 1, - "id": 24, + "id": 26, "legend": { "avg": false, "current": false, @@ -1944,7 +2094,7 @@ "datasource": "$datasource", "description": "### Scaling metric (desired replicas)\nThis panel shows the result scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints which are applied later.\n\n", "fill": 1, - "id": 25, + "id": 27, "legend": { "avg": false, "current": false, @@ -2024,7 +2174,7 @@ "datasource": "$datasource", "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler woudln't work properly.\n\n", "fill": 1, - "id": 26, + "id": 28, "legend": { "avg": false, "current": false, @@ -2120,7 +2270,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 27, + "id": 29, "legend": { "avg": false, "current": false, @@ -2194,7 +2344,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 28, + "id": 30, "legend": { "avg": false, "current": false, @@ -2292,7 +2442,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 29, + "id": 31, "legend": { "avg": false, "current": false, @@ -2366,7 +2516,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 30, + "id": 32, "legend": { "avg": false, "current": false, @@ -2459,7 +2609,7 @@ "datasource": "$datasource", "description": "### Hit ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because the store-gateway by default has an\nin-memory blocks index cache.\n\n", "fill": 1, - "id": 31, + "id": 33, "legend": { "avg": false, "current": false, @@ -2545,7 +2695,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 32, + "id": 34, "legend": { "avg": false, "current": false, @@ -2619,7 +2769,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 33, + "id": 35, "legend": { "avg": false, "current": false, @@ -2711,7 +2861,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 34, + "id": 36, "legend": { "avg": false, "current": false, @@ -2797,7 +2947,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 35, + "id": 37, "legend": { "avg": false, "current": false, @@ -2871,7 +3021,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 36, + "id": 38, "legend": { "avg": false, "current": false, @@ -2963,7 +3113,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 37, + "id": 39, "legend": { "avg": false, "current": false, @@ -3049,7 +3199,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 38, + "id": 40, "legend": { "avg": false, "current": false, @@ -3123,7 +3273,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 39, + "id": 41, "legend": { "avg": false, "current": false, @@ -3215,7 +3365,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 40, + "id": 42, "legend": { "avg": false, "current": false, @@ -3301,7 +3451,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 41, + "id": 43, "legend": { "avg": false, "current": false, @@ -3378,7 +3528,7 @@ "unit": "percentunit" } }, - "id": 42, + "id": 44, "links": [ ], "options": { "legend": { @@ -3408,7 +3558,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 43, + "id": 45, "legend": { "avg": false, "current": false, @@ -3494,7 +3644,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 44, + "id": 46, "legend": { "avg": false, "current": false, @@ -3592,7 +3742,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 45, + "id": 47, "legend": { "avg": false, "current": false, @@ -3678,7 +3828,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 46, + "id": 48, "legend": { "avg": false, "current": false, @@ -3764,7 +3914,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 47, + "id": 49, "legend": { "avg": false, "current": false, @@ -3850,7 +4000,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 48, + "id": 50, "legend": { "avg": false, "current": false, @@ -3948,7 +4098,7 @@ "dashes": false, "datasource": "$datasource", "fill": 10, - "id": 49, + "id": 51, "legend": { "avg": false, "current": false, @@ -4025,7 +4175,7 @@ "unit": "percentunit" } }, - "id": 50, + "id": 52, "links": [ ], "options": { "legend": { @@ -4055,7 +4205,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 51, + "id": 53, "legend": { "avg": false, "current": false, @@ -4141,7 +4291,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 52, + "id": 54, "legend": { "avg": false, "current": false, @@ -4239,7 +4389,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 53, + "id": 55, "legend": { "avg": false, "current": false, @@ -4325,7 +4475,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 54, + "id": 56, "legend": { "avg": false, "current": false, @@ -4411,7 +4561,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 55, + "id": 57, "legend": { "avg": false, "current": false, @@ -4497,7 +4647,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "id": 56, + "id": 58, "legend": { "avg": false, "current": false, diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-remote-ruler-reads.json b/operations/mimir-mixin-compiled/dashboards/mimir-remote-ruler-reads.json index 6d70724ec42..c19c69ce3ba 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-remote-ruler-reads.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-remote-ruler-reads.json @@ -593,7 +593,7 @@ "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", "format": "time_series", "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", - "refId": "B" + "refId": "A" }, { "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", diff --git a/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet b/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet index e4c7d0918f1..4c99d39e032 100644 --- a/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet +++ b/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet @@ -1008,7 +1008,15 @@ local utils = import 'mixin-utils/utils.libsonnet'; { yaxes: $.yaxes('percentunit') } ), - latencyPanelLabelBreakout(metricName, selector, labels=[], labelReplaceArgSets=[{}], multiplier='1e3'):: + latencyPanelLabelBreakout( + metricName, + selector, + percentiles=['0.99', '0.50'], + includeAverage=true, + labels=[], + labelReplaceArgSets=[{}], + multiplier='1e3', + ):: local averageExprTmpl = $.wrapMultiLabelReplace( query='sum(rate(%s_sum%s[$__rate_interval])) by (%s) * %s / sum(rate(%s_count%s[$__rate_interval])) by (%s)', labelReplaceArgSets=labelReplaceArgSets, @@ -1019,28 +1027,30 @@ local utils = import 'mixin-utils/utils.libsonnet'; ); local labelBreakouts = '%s' % std.join(', ', labels); local histogramLabelBreakouts = '%s' % std.join(', ', ['le'] + labels); + + local percentileTargets = [ + { + expr: histogramExprTmpl % [percentile, metricName, selector, histogramLabelBreakouts, multiplier], + format: 'time_series', + legendFormat: '%sth Percentile: {{ %s }}' % [std.lstripChars(percentile, '0.'), labelBreakouts], + refId: 'A', + } + for percentile in percentiles + ]; + local averageTargets = [ + { + expr: averageExprTmpl % [metricName, selector, labelBreakouts, multiplier, metricName, selector, labelBreakouts], + format: 'time_series', + legendFormat: 'Average: {{ %s }}' % [labelBreakouts], + refId: 'C', + }, + ]; + + local targets = if includeAverage then percentileTargets + averageTargets else percentileTargets; + { nullPointMode: 'null as zero', - targets: [ - { - expr: histogramExprTmpl % ['0.99', metricName, selector, histogramLabelBreakouts, multiplier], - format: 'time_series', - legendFormat: '99th Percentile: {{ %s }}' % [labelBreakouts], - refId: 'A', - }, - { - expr: histogramExprTmpl % ['0.50', metricName, selector, histogramLabelBreakouts, multiplier], - format: 'time_series', - legendFormat: '50th Percentile: {{ %s }}' % [labelBreakouts], - refId: 'B', - }, - { - expr: averageExprTmpl % [metricName, selector, labelBreakouts, multiplier, metricName, selector, labelBreakouts], - format: 'time_series', - legendFormat: 'Average: {{ %s }}' % [labelBreakouts], - refId: 'C', - }, - ], + targets: targets, yaxes: $.yaxes('ms'), }, diff --git a/operations/mimir-mixin/dashboards/reads.libsonnet b/operations/mimir-mixin/dashboards/reads.libsonnet index ea04b88397d..133eae1f718 100644 --- a/operations/mimir-mixin/dashboards/reads.libsonnet +++ b/operations/mimir-mixin/dashboards/reads.libsonnet @@ -160,33 +160,78 @@ local filename = 'mimir-reads.json'; .addPanel( local title = 'Requests / sec'; $.panel(title) + - $.qpsPanel('cortex_query_scheduler_queue_duration_seconds_count{%s}' % $.jobMatcher($._config.job_names.query_scheduler)) + - $.panelDescription(title, description), + $.panelDescription(title, description) + + $.qpsPanel('cortex_query_scheduler_queue_duration_seconds_count{%s}' % $.jobMatcher($._config.job_names.query_scheduler)) ) .addPanel( local title = 'Latency (Time in Queue)'; $.panel(title) + - $.latencyPanel('cortex_query_scheduler_queue_duration_seconds', '{%s}' % $.jobMatcher($._config.job_names.query_scheduler)) + - $.panelDescription(title, description), + $.panelDescription(title, description) + + $.latencyPanel('cortex_query_scheduler_queue_duration_seconds', '{%s}' % $.jobMatcher($._config.job_names.query_scheduler)) + ) + ) + .addRow( + local description = ||| +

+ The query scheduler can optionally create subqueues + in order to enforce round-robin query queuing fairness + across additional queue dimensions beyond the default. + + By default, query queuing fairness is only applied by tenant ID. + Queries without additional queue dimensions are labeled 'none'. +

+ |||; + local metricName = 'cortex_query_scheduler_queue_duration_seconds'; + local selector = '{%s}' % $.jobMatcher($._config.job_names.query_scheduler); + local labels = ['additional_queue_dimensions']; + local labelReplaceArgSets = [ + { + dstLabel: 'additional_queue_dimensions', + replacement: 'none', + srcLabel: + 'additional_queue_dimensions', + regex: '^$', + }, + ]; + $.row('Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions') + .addPanel( + local title = '99th Percentile Latency by Queue Dimension'; + $.panel(title) + + $.panelDescription(title, description) + + $.latencyPanelLabelBreakout( + metricName=metricName, + selector=selector, + percentiles=['0.99'], + includeAverage=false, + labels=labels, + labelReplaceArgSets=labelReplaceArgSets, + ) ) .addPanel( - local title = 'Latency (Time in Queue) by Queue Dimension'; + local title = '50th Percentile Latency by Queue Dimension'; $.panel(title) + + $.panelDescription(title, description) + $.latencyPanelLabelBreakout( - metricName='cortex_query_scheduler_queue_duration_seconds', - selector='{%s}' % $.jobMatcher($._config.job_names.query_scheduler), - labels=['additional_queue_dimensions'], - labelReplaceArgSets=[ - { - dstLabel: 'additional_queue_dimensions', - replacement: 'none', - srcLabel: - 'additional_queue_dimensions', - regex: '^$', - }, - ] - ) + - $.panelDescription(title, description), + metricName=metricName, + selector=selector, + percentiles=['0.50'], + includeAverage=false, + labels=labels, + labelReplaceArgSets=labelReplaceArgSets, + ) + ) + .addPanel( + local title = 'Average Latency by Queue Dimension'; + $.panel(title) + + $.panelDescription(title, description) + + $.latencyPanelLabelBreakout( + metricName=metricName, + selector=selector, + percentiles=[], + includeAverage=true, + labels=labels, + labelReplaceArgSets=labelReplaceArgSets, + ) ) ) .addRow(