Skip to content

Commit

Permalink
Fix kubernetes OOTB dashboard and monitors (#10660)
Browse files Browse the repository at this point in the history
* Replace `replicaset` by `kube_replica_set` tag on pod metrics.
* Replace `phase` by `pod_phase` tag on pod metrics.
  • Loading branch information
clamoriniere committed Nov 17, 2021
1 parent 2f21806 commit 7ec881f
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion kubernetes/assets/dashboards/kubernetes_clusters.json
Original file line number Diff line number Diff line change
Expand Up @@ -3243,7 +3243,7 @@
"value": 0
}
],
"q": "top(sum:kubernetes_state.pod.status_phase{*,$cluster,!phase:running,!phase:succeeded,$scope} by {cluster_name,kube_namespace,phase}, 100, 'last', 'desc')"
"q": "top(sum:kubernetes_state.pod.status_phase{*,$cluster,!pod_phase:running,!pod_phase:succeeded,$scope} by {cluster_name,kube_namespace,pod_phase}, 100, 'last', 'desc')"
}
],
"title": "Pods in bad phase by namespaces",
Expand Down
6 changes: 3 additions & 3 deletions kubernetes/assets/dashboards/kubernetes_dashboard.json
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@
"type": "timeseries",
"requests": [
{
"q": "sum:kubernetes_state.replicaset.replicas_ready{$scope,$daemonset,$service,$namespace,$deployment,$label,$cluster,$node} by {replicaset}",
"q": "sum:kubernetes_state.replicaset.replicas_ready{$scope,$daemonset,$service,$namespace,$deployment,$label,$cluster,$node} by {kube_replica_set}",
"display_type": "area",
"style": {
"palette": "purple",
Expand Down Expand Up @@ -930,7 +930,7 @@
"type": "timeseries",
"requests": [
{
"q": "sum:kubernetes_state.replicaset.replicas_desired{$scope,$daemonset,$service,$namespace,$deployment,$label,$cluster,$node} by {replicaset}-sum:kubernetes_state.replicaset.replicas_ready{$scope,$daemonset,$service,$namespace,$deployment,$label,$cluster,$node} by {replicaset}",
"q": "sum:kubernetes_state.replicaset.replicas_desired{$scope,$daemonset,$service,$namespace,$deployment,$label,$cluster,$node} by {kube_replica_set}-sum:kubernetes_state.replicaset.replicas_ready{$scope,$daemonset,$service,$namespace,$deployment,$label,$cluster,$node} by {kube_replica_set}",
"display_type": "area",
"style": {
"palette": "orange",
Expand Down Expand Up @@ -1245,7 +1245,7 @@
"type": "toplist",
"requests": [
{
"q": "top(sum:kubernetes_state.pod.status_phase{$scope,$cluster,$namespace,$deployment,$daemonset,!phase:running,!phase:succeeded,$label,$node,$service} by {kube_cluster_name,kube_namespace,phase}, 100, 'last', 'desc')",
"q": "top(sum:kubernetes_state.pod.status_phase{$scope,$cluster,$namespace,$deployment,$daemonset,!pod_phase:running,!pod_phase:succeeded,$label,$node,$service} by {kube_cluster_name,kube_namespace,pod_phase}, 100, 'last', 'desc')",
"conditional_formats": [
{
"comparator": ">",
Expand Down
2 changes: 1 addition & 1 deletion kubernetes/assets/dashboards/kubernetes_pods.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
"response_format": "scalar",
"queries": [
{
"query": "sum:kubernetes_state.pod.status_phase{$scope,$cluster,$namespace,$deployment,$statefulset,$daemonset,$job,$cronjob,!phase:running,!phase:succeeded} by {phase,kube_namespace}",
"query": "sum:kubernetes_state.pod.status_phase{$scope,$cluster,$namespace,$deployment,$statefulset,$daemonset,$job,$cronjob,!pod_phase:running,!pod_phase:succeeded} by {pod_phase,kube_namespace}",
"data_source": "metrics",
"name": "query1",
"aggregator": "last"
Expand Down
2 changes: 1 addition & 1 deletion kubernetes/assets/monitors/monitor_pods_failed_state.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "[kubernetes] Monitor Kubernetes Failed Pods in Namespaces",
"type": "query alert",
"query": "change(avg(last_5m),last_5m):sum:kubernetes_state.pod.status_phase{phase:failed} by {kube_cluster_name,kube_namespace} > 10",
"query": "change(avg(last_5m),last_5m):sum:kubernetes_state.pod.status_phase{pod_phase:failed} by {kube_cluster_name,kube_namespace} > 10",
"message": "More than ten pods are failing in ({{kube_cluster_name.name}} cluster). \n The threshold of ten pods varies depending on your infrastructure. Change the threshold to suit your needs.",
"tags": [
"integration:kubernetes"
Expand Down

0 comments on commit 7ec881f

Please sign in to comment.