diff --git a/CHANGELOG.md b/CHANGELOG.md index 35869f0266..a0556f5078 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,7 +24,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#4576](https://github.com/thanos-io/thanos/pull/4576) UI: add filter compaction level to the Block UI. - [#4731](https://github.com/thanos-io/thanos/pull/4731) Rule: add stateless mode to ruler according to https://thanos.io/tip/proposals-accepted/202005-scalable-rule-storage.md/. Continue https://github.com/thanos-io/thanos/pull/4250. - [#4612](https://github.com/thanos-io/thanos/pull/4612) Sidecar: add `--prometheus.http-client` and `--prometheus.http-client-file` flag for sidecar to connect Prometheus with basic auth or TLS. -- [#4848](https://github.com/thanos-io/thanos/pull/4848) Compactor: added Prometheus metric for tracking the progress of retention. +- [#4856](https://github.com/thanos-io/thanos/pull/4856) Mixin: Add Query Frontend Grafana dashboard. ### Fixed diff --git a/examples/dashboards/dashboards.md b/examples/dashboards/dashboards.md index 4fae5a1a1c..4cff60678d 100644 --- a/examples/dashboards/dashboards.md +++ b/examples/dashboards/dashboards.md @@ -5,6 +5,7 @@ There exists Grafana dashboards for each component (not all of them complete) ta - [Thanos Overview](overview.json) - [Thanos Compact](compact.json) - [Thanos Querier](query.json) +- [Thanos Query Frontend](queryFrontend.json) - [Thanos Store](store.json) - [Thanos Receiver](receive.json) - [Thanos Sidecar](sidecar.json) diff --git a/examples/dashboards/queryFrontend.json b/examples/dashboards/queryFrontend.json new file mode 100644 index 0000000000..07b4b33346 --- /dev/null +++ b/examples/dashboards/queryFrontend.json @@ -0,0 +1,1117 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows rate of requests against Query Frontend for the given time.", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/1../", + "color": "#EAB839" + }, + { + "alias": "/2../", + "color": "#37872D" + }, + { + "alias": "/3../", + "color": "#E0B400" + }, + { + "alias": "/4../", + "color": "#1F60C4" + }, + { + "alias": "/5../", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, handler, code) (rate(http_requests_total{job=~\"$job\", handler=\"query-frontend\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} {{handler}} {{code}}", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of requests", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows rate of queries passing through Query Frontend", + "fill": 10, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/1../", + "color": "#EAB839" + }, + { + "alias": "/2../", + "color": "#37872D" + }, + { + "alias": "/3../", + "color": "#E0B400" + }, + { + "alias": "/4../", + "color": "#1F60C4" + }, + { + "alias": "/5../", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, handler, code) (rate(thanos_query_frontend_queries_total{job=~\"$job\", op=\"query_range\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} {{handler}} {{code}}", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of queries", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "error": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows ratio of errors compared to the the total number of handled requests against Query Frontend.", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query-frontend\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query-frontend\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "error", + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Errors", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows how long has it taken to handle requests in quantiles.", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "p99", + "color": "#FA6400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p90", + "color": "#E0B400", + "fill": 1, + "fillGradient": 1 + }, + { + "alias": "p50", + "color": "#37872D", + "fill": 10, + "fillGradient": 0 + } + ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.50, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~\"$job\", handler=\"query-frontend\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p50 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "step": 10 + }, + { + "expr": "histogram_quantile(0.90, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~\"$job\", handler=\"query-frontend\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p90 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "step": 10 + }, + { + "expr": "histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~\"$job\", handler=\"query-frontend\"}[$interval]))) * 1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "p99 {{job}}", + "logBase": 10, + "max": null, + "min": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query Frontend API", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show rate of cache requests.", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, tripperware) (rate(cortex_cache_request_duration_seconds_count{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} {{tripperware}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Requests", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show rate of Querier cache gets vs misses.", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, tripperware) (rate(querier_cache_gets_total{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cache gets - {{job}} {{tripperware}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum by (job, tripperware) (rate(querier_cache_misses_total{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cache misses - {{job}} {{tripperware}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Querier cache gets vs misses", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows rate of cortex fetched keys.", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, tripperware) (rate(cortex_cache_fetched_keys{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} {{tripperware}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Cortex fetched keys", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows rate of cortex cache hits.", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (job, tripperware) (rate(cortex_cache_hits{job=~\"$job\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{job}} {{tripperware}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Cortex cache hits", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cache Operations", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_alloc_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "alloc all {{instance}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "go_memstats_heap_alloc_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "alloc heap {{instance}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "rate(go_memstats_alloc_bytes_total{job=~\"$job\"}[30s])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "alloc rate all {{instance}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "rate(go_memstats_heap_alloc_bytes{job=~\"$job\"}[30s])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "alloc rate heap {{instance}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "go_memstats_stack_inuse_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inuse heap {{instance}}", + "legendLink": null, + "step": 10 + }, + { + "expr": "go_memstats_heap_inuse_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inuse stack {{instance}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Used", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Goroutines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_gc_duration_seconds{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{quantile}} {{instance}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "GC Time Quantiles", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Resources", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "thanos-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "auto": true, + "auto_count": 300, + "auto_min": "10s", + "current": { + "text": "5m", + "value": "5m" + }, + "hide": 0, + "label": "interval", + "name": "interval", + "query": "5m,10m,30m,1h,6h,12h", + "refresh": 2, + "type": "interval" + }, + { + "allValue": null, + "current": { + "text": "all", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": false, + "name": "job", + "options": [ ], + "query": "label_values(up{job=~\".*thanos-query-frontend.*\"}, job)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "UTC", + "title": "Thanos / Query Frontend", + "uid": "9bc9f8bb21d4d18193c3fe772b36c306", + "version": 0 +} diff --git a/mixin/README.md b/mixin/README.md index baef01946c..1ecedabb95 100644 --- a/mixin/README.md +++ b/mixin/README.md @@ -88,6 +88,10 @@ This project is intended to be used as a library. You can extend and customize d selector: 'job=~".*thanos-query.*"', title: '%(prefix)sQuery' % $.dashboard.prefix, }, + queryFrontend+:: { + selector: 'job=~".*thanos-query-frontend.*"', + title: '%(prefix)sQuery Frontend' % $.dashboard.prefix, + }, store+:: { selector: 'job=~".*thanos-store.*"', title: '%(prefix)sStore' % $.dashboard.prefix, diff --git a/mixin/config.libsonnet b/mixin/config.libsonnet index e4d415d5ef..55e4e9cb17 100644 --- a/mixin/config.libsonnet +++ b/mixin/config.libsonnet @@ -28,6 +28,10 @@ selector: 'job=~".*thanos-query.*"', title: '%(prefix)sQuery' % $.dashboard.prefix, }, + queryFrontend+:: { + selector: 'job=~".*thanos-query-frontend.*"', + title: '%(prefix)sQuery Frontend' % $.dashboard.prefix, + }, store+:: { selector: 'job=~".*thanos-store.*"', title: '%(prefix)sStore' % $.dashboard.prefix, diff --git a/mixin/dashboards/dashboards.libsonnet b/mixin/dashboards/dashboards.libsonnet index 5bd99093f5..d35bbcd843 100644 --- a/mixin/dashboards/dashboards.libsonnet +++ b/mixin/dashboards/dashboards.libsonnet @@ -1,4 +1,5 @@ (import 'query.libsonnet') + +(import 'query_frontend.libsonnet') + (import 'store.libsonnet') + (import 'sidecar.libsonnet') + (import 'receive.libsonnet') + diff --git a/mixin/dashboards/query_frontend.libsonnet b/mixin/dashboards/query_frontend.libsonnet new file mode 100644 index 0000000000..136f7405f4 --- /dev/null +++ b/mixin/dashboards/query_frontend.libsonnet @@ -0,0 +1,82 @@ +local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; +local utils = import '../lib/utils.libsonnet'; + +{ + local thanos = self, + queryFrontend+:: { + selector: error 'must provide selector for Thanos Query Frontend dashboard', + title: error 'must provide title for Thanos Query Frontend dashboard', + dashboard:: { + selector: std.join(', ', thanos.dashboard.selector + ['job=~"$job"']), + dimensions: std.join(', ', thanos.dashboard.dimensions + ['job']), + }, + }, + grafanaDashboards+:: { + [if thanos.queryFrontend != null then 'queryFrontend.json']: + local queryFrontendHandlerSelector = utils.joinLabels([thanos.queryFrontend.dashboard.selector, 'handler="query-frontend"']); + local queryFrontendTripperwareSelector = utils.joinLabels([thanos.queryFrontend.dashboard.selector, 'tripperware="query_range"']); + local queryFrontendOpSelector = utils.joinLabels([thanos.queryFrontend.dashboard.selector, 'op="query_range"']); + g.dashboard(thanos.queryFrontend.title) + .addRow( + g.row('Query Frontend API') + .addPanel( + g.panel('Rate of requests', 'Shows rate of requests against Query Frontend for the given time.') + + g.httpQpsPanel('http_requests_total', queryFrontendHandlerSelector, thanos.queryFrontend.dashboard.dimensions) + ) + .addPanel( + g.panel('Rate of queries', 'Shows rate of queries passing through Query Frontend') + + g.httpQpsPanel('thanos_query_frontend_queries_total', queryFrontendOpSelector, thanos.queryFrontend.dashboard.dimensions) + ) + .addPanel( + g.panel('Errors', 'Shows ratio of errors compared to the the total number of handled requests against Query Frontend.') + + g.httpErrPanel('http_requests_total', queryFrontendHandlerSelector, thanos.queryFrontend.dashboard.dimensions) + ) + .addPanel( + g.panel('Duration', 'Shows how long has it taken to handle requests in quantiles.') + + g.latencyPanel('http_request_duration_seconds', queryFrontendHandlerSelector, thanos.queryFrontend.dashboard.dimensions) + ) + ) + .addRow( + g.row('Cache Operations') + .addPanel( + g.panel('Requests', 'Show rate of cache requests.') + + g.queryPanel( + 'sum by (%s) (rate(cortex_cache_request_duration_seconds_count{%s}[$interval]))' % [utils.joinLabels([thanos.queryFrontend.dashboard.dimensions, 'tripperware']), thanos.queryFrontend.dashboard.selector], + '{{job}} {{tripperware}}', + ) + + g.stack + ) + .addPanel( + g.panel('Querier cache gets vs misses', 'Show rate of Querier cache gets vs misses.') + + g.queryPanel( + 'sum by (%s) (rate(querier_cache_gets_total{%s}[$interval]))' % [utils.joinLabels([thanos.queryFrontend.dashboard.dimensions, 'tripperware']), thanos.queryFrontend.dashboard.selector], + 'Cache gets - {{job}} {{tripperware}}', + ) + + g.queryPanel( + 'sum by (%s) (rate(querier_cache_misses_total{%s}[$interval]))' % [utils.joinLabels([thanos.queryFrontend.dashboard.dimensions, 'tripperware']), thanos.queryFrontend.dashboard.selector], + 'Cache misses - {{job}} {{tripperware}}', + ) + + g.stack + ) + .addPanel( + g.panel('Cortex fetched keys', 'Shows rate of cortex fetched keys.') + + g.queryPanel( + 'sum by (%s) (rate(cortex_cache_fetched_keys{%s}[$interval]))' % [utils.joinLabels([thanos.queryFrontend.dashboard.dimensions, 'tripperware']), thanos.queryFrontend.dashboard.selector], + '{{job}} {{tripperware}}', + ) + + g.stack + ) + .addPanel( + g.panel('Cortex cache hits', 'Shows rate of cortex cache hits.') + + g.queryPanel( + 'sum by (%s) (rate(cortex_cache_hits{%s}[$interval]))' % [utils.joinLabels([thanos.queryFrontend.dashboard.dimensions, 'tripperware']), thanos.queryFrontend.dashboard.selector], + '{{job}} {{tripperware}}', + ) + + g.stack + ) + ) + .addRow( + g.resourceUtilizationRow(thanos.queryFrontend.dashboard.selector, thanos.queryFrontend.dashboard.dimensions) + ), + }, +}