Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

operator: Add dynamic hooks for graceful restarts #4907

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src/go/k8s/apis/redpanda/v1alpha1/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,12 @@ type ClusterSpec struct {
type RestartConfig struct {
// DisableMaintenanceModeHooks deactivates the preStop and postStart hooks that force nodes to enter maintenance mode when stopping and exit maintenance mode when up again
DisableMaintenanceModeHooks *bool `json:"disableMaintenanceModeHooks,omitempty"`
// DisableReadinessProbe deactivates the readiness probe that verifies the state of each node by querying the Redpanda admin API
DisableReadinessProbe *bool `json:"disableReadinessProbe,omitempty"`
// DisableClusterHealthCheck deactivates the wait for cluster health when restarting
DisableClusterHealthCheck *bool `json:"disableClusterHealthCheck,omitempty"`
// HealthCheckTimeoutSeconds configures the maximum time to wait for the cluster to become healthy before giving up
HealthCheckTimeoutSeconds *int32 `json:"healthCheckTimeoutSeconds,omitempty"`
}

// PDBConfig specifies how the PodDisruptionBudget should be created for the
Expand Down Expand Up @@ -844,6 +850,24 @@ func (r *Cluster) IsUsingMaintenanceModeHooks() bool {
return true
}

// IsUsingReadinessProbe tells if the cluster is configured to use the readiness probe on the pods.
func (r *Cluster) IsUsingReadinessProbe() bool {
// enabled unless explicitly stated
if r.Spec.RestartConfig != nil && r.Spec.RestartConfig.DisableReadinessProbe != nil {
return !*r.Spec.RestartConfig.DisableReadinessProbe
}
return true
}

// IsUsingClusterHealthCheck tells if the cluster is configured to use wait for cluster health when restarting.
func (r *Cluster) IsUsingClusterHealthCheck() bool {
// enabled unless explicitly stated
if r.Spec.RestartConfig != nil && r.Spec.RestartConfig.DisableClusterHealthCheck != nil {
return !*r.Spec.RestartConfig.DisableClusterHealthCheck
}
return true
}

// ClusterStatus

// IsRestarting tells if the cluster is restarting due to a change in configuration or an upgrade in progress
Expand Down
31 changes: 31 additions & 0 deletions src/go/k8s/apis/redpanda/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -652,11 +652,26 @@ spec:
description: RestartConfig allows to control the behavior of the cluster
when restarting
properties:
disableClusterHealthCheck:
description: DisableClusterHealthCheck deactivates the wait for
cluster health when restarting
type: boolean
disableMaintenanceModeHooks:
description: DisableMaintenanceModeHooks deactivates the preStop
and postStart hooks that force nodes to enter maintenance mode
when stopping and exit maintenance mode when up again
type: boolean
disableReadinessProbe:
description: DisableReadinessProbe deactivates the readiness probe
that verifies the state of each node by querying the Redpanda
admin API
type: boolean
healthCheckTimeoutSeconds:
description: HealthCheckTimeoutSeconds configures the maximum
time to wait for the cluster to become healthy before giving
up
format: int32
type: integer
type: object
sidecars:
description: Sidecars is list of sidecars run alongside redpanda container
Expand Down
3 changes: 3 additions & 0 deletions src/go/k8s/controllers/redpanda/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ func (r *ClusterReconciler) Reconcile(
sa := resources.NewServiceAccount(r.Client, &redpandaCluster, r.Scheme, log)
configMapResource := resources.NewConfigMap(r.Client, &redpandaCluster, r.Scheme, headlessSvc.HeadlessServiceFQDN(r.clusterDomain), proxySuKey, schemaRegistrySuKey, log)

hooks := resources.NewHooksConfigMap(r.Client, &redpandaCluster, r.Scheme, log)

sts := resources.NewStatefulSet(
r.Client,
&redpandaCluster,
Expand Down Expand Up @@ -173,6 +175,7 @@ func (r *ClusterReconciler) Reconcile(
resources.NewClusterRole(r.Client, &redpandaCluster, r.Scheme, log),
crb,
resources.NewPDB(r.Client, &redpandaCluster, r.Scheme, log),
hooks,
sts,
}

Expand Down
41 changes: 16 additions & 25 deletions src/go/k8s/controllers/redpanda/cluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,6 @@ var _ = Describe("RedPandaCluster controller", func() {
Entry("Random image pull policy", "asdvasd", Not(Succeed())))

Context("Populating stable condition", func() {

It("Marks the cluster as unstable until it gets enough instances", func() {
By("Allowing creation of a new cluster")
key, _, redpandaCluster := getInitialTestCluster("initial")
Expand All @@ -753,7 +752,7 @@ var _ = Describe("RedPandaCluster controller", func() {
Eventually(clusterStableConditionStatusGetter(key), timeout, interval).Should(Equal(corev1.ConditionFalse))

By("Allowing creation of pods")
Expect(createClusterPod(redpandaCluster, "initial-0", true)).To(Succeed())
Expect(createReadyClusterPod(redpandaCluster, "initial-0")).To(Succeed())

By("Setting the unstable condition to true")
Eventually(clusterStableConditionStatusGetter(key), timeout, interval).Should(Equal(corev1.ConditionTrue))
Expand All @@ -773,9 +772,9 @@ var _ = Describe("RedPandaCluster controller", func() {
Eventually(clusterStableConditionStatusGetter(key), timeout, interval).Should(Equal(corev1.ConditionFalse))

By("Allowing creation of pods")
Expect(createClusterPod(redpandaCluster, "multi-instance-0", true)).To(Succeed())
Expect(createClusterPod(redpandaCluster, "multi-instance-1", true)).To(Succeed())
Expect(createClusterPod(redpandaCluster, "multi-instance-2", true)).To(Succeed())
Expect(createReadyClusterPod(redpandaCluster, "multi-instance-0")).To(Succeed())
Expect(createReadyClusterPod(redpandaCluster, "multi-instance-1")).To(Succeed())
Expect(createReadyClusterPod(redpandaCluster, "multi-instance-2")).To(Succeed())

By("Setting the unstable condition to true")
Eventually(clusterStableConditionStatusGetter(key), timeout, interval).Should(Equal(corev1.ConditionTrue))
Expand All @@ -799,7 +798,6 @@ var _ = Describe("RedPandaCluster controller", func() {
By("Deleting the cluster")
Expect(k8sClient.Delete(context.Background(), redpandaCluster)).Should(Succeed())
})

})
})

Expand Down Expand Up @@ -848,10 +846,8 @@ func clusterStableConditionStatusGetter(
}
}

func createClusterPod(
cluster *v1alpha1.Cluster,
name string,
ready bool,
func createReadyClusterPod(
cluster *v1alpha1.Cluster, name string,
) error {
pod := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -868,28 +864,23 @@ func createClusterPod(
},
},
}
println("create", cluster.Namespace, name)
err := k8sClient.Create(context.Background(), &pod)
if err != nil {
return err
}
if ready {
pod.Status.Conditions = append(pod.Status.Conditions, corev1.PodCondition{
Type: corev1.PodReady,
Status: corev1.ConditionTrue,
})
err = k8sClient.Status().Update(context.Background(), &pod)
if err != nil {
return err
}
pod.Status.Conditions = append(pod.Status.Conditions, corev1.PodCondition{
Type: corev1.PodReady,
Status: corev1.ConditionTrue,
})
err = k8sClient.Status().Update(context.Background(), &pod)
if err != nil {
return err
}
return forceReconciliation(cluster)
}

func setClusterPodReadiness(
cluster *v1alpha1.Cluster,
name string,
ready bool,
cluster *v1alpha1.Cluster, name string, ready bool,
) error {
var pod corev1.Pod
err := k8sClient.Get(context.Background(), types.NamespacedName{Namespace: cluster.Namespace, Name: name}, &pod)
Expand All @@ -902,7 +893,7 @@ func setClusterPodReadiness(
newStatus = corev1.ConditionTrue
}

var existing = false
existing := false
for i := range pod.Status.Conditions {
if pod.Status.Conditions[i].Type == corev1.PodReady {
existing = true
Expand Down Expand Up @@ -934,7 +925,7 @@ func forceReconciliation(cluster *v1alpha1.Cluster) error {
return err
}
// Change just to trigger reconciliation, but unrelated to the use case
cluster.Spec.CloudStorage.APIEndpointPort = cluster.Spec.CloudStorage.APIEndpointPort + 1
cluster.Spec.CloudStorage.APIEndpointPort++
return k8sClient.Update(context.Background(), cluster)
})
}
32 changes: 32 additions & 0 deletions src/go/k8s/pkg/resources/featuregates/health_overview.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright 2022 Redpanda Data, Inc.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.md
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0

package featuregates

import "github.com/Masterminds/semver/v3"

const (
healthOverviewMajor = uint64(22)
healthOverviewMinor = uint64(1)
)

// HealthOverview feature gate should be removed when the operator
// will no longer support 21.x or older versions
func HealthOverview(version string) bool {
if version == devVersion {
// development version contains this feature
return true
}
v, err := semver.NewVersion(version)
if err != nil {
return false
}

return v.Major() == healthOverviewMajor && v.Minor() >= healthOverviewMinor || v.Major() > healthOverviewMajor
}
Loading