From b2222e2c8c821d11aa7303b13e66535f0865a8b4 Mon Sep 17 00:00:00 2001 From: Omer Aplatony Date: Thu, 18 Jul 2024 20:26:07 +0300 Subject: [PATCH] helm: add configurable liveness&readiness probes for master topology-updater and worker Signed-off-by: Omer Aplatony --- .../templates/master.yaml | 11 +--- .../templates/topologyupdater.yaml | 11 +--- .../templates/worker.yaml | 11 +--- .../helm/node-feature-discovery/values.yaml | 57 +++++++++++-------- docs/deployment/helm.md | 10 +++- 5 files changed, 48 insertions(+), 52 deletions(-) diff --git a/deployment/helm/node-feature-discovery/templates/master.yaml b/deployment/helm/node-feature-discovery/templates/master.yaml index 46682ceed4..1f16f6c7d6 100644 --- a/deployment/helm/node-feature-discovery/templates/master.yaml +++ b/deployment/helm/node-feature-discovery/templates/master.yaml @@ -46,16 +46,9 @@ spec: image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} livenessProbe: - grpc: - port: 8082 - initialDelaySeconds: 10 - periodSeconds: 10 + {{- toYaml .Values.master.livenessProbe | nindent 12 }} readinessProbe: - grpc: - port: 8082 - initialDelaySeconds: 5 - periodSeconds: 10 - failureThreshold: 10 + {{- toYaml .Values.master.readinessProbe | nindent 12 }} ports: - containerPort: {{ .Values.master.port | default "8080" }} name: grpc diff --git a/deployment/helm/node-feature-discovery/templates/topologyupdater.yaml b/deployment/helm/node-feature-discovery/templates/topologyupdater.yaml index 8991f5fbf6..a94aac8701 100644 --- a/deployment/helm/node-feature-discovery/templates/topologyupdater.yaml +++ b/deployment/helm/node-feature-discovery/templates/topologyupdater.yaml @@ -43,16 +43,9 @@ spec: image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: "{{ .Values.image.pullPolicy }}" livenessProbe: - grpc: - port: 8082 - initialDelaySeconds: 10 - periodSeconds: 10 + {{- toYaml .Values.topologyUpdater.livenessProbe | nindent 10 }} readinessProbe: - grpc: - port: 8082 - initialDelaySeconds: 5 - periodSeconds: 10 - failureThreshold: 10 + {{- toYaml .Values.topologyUpdater.readinessProbe | nindent 10 }} env: - name: NODE_NAME valueFrom: diff --git a/deployment/helm/node-feature-discovery/templates/worker.yaml b/deployment/helm/node-feature-discovery/templates/worker.yaml index 48512eed9c..2aae584746 100644 --- a/deployment/helm/node-feature-discovery/templates/worker.yaml +++ b/deployment/helm/node-feature-discovery/templates/worker.yaml @@ -45,16 +45,9 @@ spec: image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} livenessProbe: - grpc: - port: 8082 - initialDelaySeconds: 10 - periodSeconds: 10 + {{- toYaml .Values.worker.livenessProbe | nindent 12 }} readinessProbe: - grpc: - port: 8082 - initialDelaySeconds: 5 - periodSeconds: 10 - failureThreshold: 10 + {{- toYaml .Values.worker.readinessProbe | nindent 12 }} env: - name: NODE_NAME valueFrom: diff --git a/deployment/helm/node-feature-discovery/values.yaml b/deployment/helm/node-feature-discovery/values.yaml index 6a40df698e..524421a058 100644 --- a/deployment/helm/node-feature-discovery/values.yaml +++ b/deployment/helm/node-feature-discovery/values.yaml @@ -140,6 +140,19 @@ master: - key: "node-role.kubernetes.io/control-plane" operator: In values: [""] + + livenessProbe: + grpc: + port: 8082 + initialDelaySeconds: 10 + # failureThreshold: 3 + # periodSeconds: 10 + readinessProbe: + grpc: + port: 8082 + initialDelaySeconds: 5 + failureThreshold: 10 + # periodSeconds: 10 worker: enable: true @@ -401,19 +414,18 @@ worker: runAsNonRoot: true # runAsUser: 1000 - # livenessProbe: {} - ## NOTE: Currently not configurable, defaults are provided for the sake of extra documentation. - # grpc: - # port: 8082 - # initialDelaySeconds: 10 + livenessProbe: + grpc: + port: 8082 + initialDelaySeconds: 10 + # failureThreshold: 3 # periodSeconds: 10 - # readinessProbe: {} - ## NOTE: Currently not configurable, defaults are provided for the sake of extra documentation. - # grpc: - # port: 8082 - # initialDelaySeconds: 5 + readinessProbe: + grpc: + port: 8082 + initialDelaySeconds: 5 + failureThreshold: 10 # periodSeconds: 10 - # failureThreshold: 10 serviceAccount: # Specifies whether a service account should be created. @@ -492,20 +504,19 @@ topologyUpdater: drop: [ "ALL" ] readOnlyRootFilesystem: true runAsUser: 0 - - # livenessProbe: {} - ## NOTE: Currently not configurable, defaults are provided for the sake of extra documentation. - # grpc: - # port: 8082 - # initialDelaySeconds: 10 + + livenessProbe: + grpc: + port: 8082 + initialDelaySeconds: 10 + # failureThreshold: 3 # periodSeconds: 10 - # readinessProbe: {} - ## NOTE: Currently not configurable, defaults are provided for the sake of extra documentation. - # grpc: - # port: 8082 - # initialDelaySeconds: 5 + readinessProbe: + grpc: + port: 8082 + initialDelaySeconds: 5 + failureThreshold: 10 # periodSeconds: 10 - # failureThreshold: 10 resources: limits: diff --git a/docs/deployment/helm.md b/docs/deployment/helm.md index decacda5b3..9465a2bde0 100644 --- a/docs/deployment/helm.md +++ b/docs/deployment/helm.md @@ -144,6 +144,8 @@ API's you need to install the prometheus operator in your cluster. | `master.config` | dict | | NFD master [configuration](../reference/master-configuration-reference) | | `master.args` | array | [] | Additional [command line arguments](../reference/master-commandline-reference.md) to pass to nfd-master | | `master.revisionHistoryLimit` | integer | | Specify how many old ReplicaSets for this Deployment you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#revision-history-limit) | +| `master.livenessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":10} | NFD master pod [liveness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#liveness-probe) | +| `master.readinessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":5,"failureThreshold": 10} | NFD master pod [readiness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#readiness-probe)| ### Worker pod parameters @@ -168,7 +170,9 @@ API's you need to install the prometheus operator in your cluster. | `worker.annotations` | dict | {} | NFD worker pod [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) | | `worker.daemonsetAnnotations` | dict | {} | NFD worker daemonset [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) | | `worker.args` | array | [] | Additional [command line arguments](../reference/worker-commandline-reference.md) to pass to nfd-worker | -| `worker.revisionHistoryLimit` | integer | | Specify how many old ControllerRevisions for this DaemonSet you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/daemon-set-v1/#DaemonSetSpec) | +| `worker.revisionHistoryLimit` | integer | | Specify how many old ControllerRevisions for this DaemonSet you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/daemon-set-v1/ #DaemonSetSpec) | +| `worker.livenessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":10} | NFD worker pod [liveness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#liveness-probe) | +| `worker.readinessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":5,"failureThreshold": 10} | NFD worker pod [readiness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#readiness-probe)| ### Topology updater parameters @@ -199,7 +203,9 @@ API's you need to install the prometheus operator in your cluster. | `topologyUpdater.podSetFingerprint` | bool | true | Enables compute and report of pod fingerprint in NRT objects. | | `topologyUpdater.kubeletStateDir` | string | /var/lib/kubelet | Specifies kubelet state directory path for watching state and checkpoint files. Empty value disables kubelet state tracking. | | `topologyUpdater.args` | array | [] | Additional [command line arguments](../reference/topology-updater-commandline-reference.md) to pass to nfd-topology-updater | -| `topologyUpdater.revisionHistoryLimit` | integer | | Specify how many old ControllerRevisions for this DaemonSet you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/daemon-set-v1/#DaemonSetSpec) | +| `topologyUpdater.revisionHistoryLimit` | integer | | Specify how many old ControllerRevisions for this DaemonSet you want to retain. [revisionHistoryLimit](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/daemon-set-v1/#DaemonSetSpec) | +| `topologyUpdater.livenessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":10} | Topology updater pod [liveness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#liveness-probe) | +| `topologyUpdater.readinessProbe` | dict | {"grpc":{"port":8082},"initialDelaySeconds":5,"failureThreshold": 10} | Topology updater pod [readiness probe](https://kubernetes.io/docs/concepts/configuration/liveness-readiness-startup-probes/#readiness-probe)| ### Garbage collector parameters