redpanda-data · nicolaferraro · May 13, 2022 · May 17, 2022 · May 16, 2022 · May 23, 2022
@@ -158,6 +158,12 @@ type ClusterSpec struct {
 type RestartConfig struct {
 	// DisableMaintenanceModeHooks deactivates the preStop and postStart hooks that force nodes to enter maintenance mode when stopping and exit maintenance mode when up again
 	DisableMaintenanceModeHooks *bool `json:"disableMaintenanceModeHooks,omitempty"`
+	// DisableReadinessProbe deactivates the readiness probe that verifies the state of each node by querying the Redpanda admin API
+	DisableReadinessProbe *bool `json:"disableReadinessProbe,omitempty"`
+	// DisableClusterHealthCheck deactivates the wait for cluster health when restarting
+	DisableClusterHealthCheck *bool `json:"disableClusterHealthCheck,omitempty"`
+	// HealthCheckTimeoutSeconds configures the maximum time to wait for the cluster to become healthy before giving up
+	HealthCheckTimeoutSeconds *int32 `json:"healthCheckTimeoutSeconds,omitempty"`
 }
 
 // PDBConfig specifies how the PodDisruptionBudget should be created for the
@@ -332,13 +338,15 @@ type ClusterCondition struct {
 }
 
 // ClusterConditionType is a valid value for ClusterCondition.Type
-// +kubebuilder:validation:Enum=ClusterConfigured
+// +kubebuilder:validation:Enum=ClusterConfigured;ClusterStable
 type ClusterConditionType string
 
 // These are valid conditions of the cluster.
 const (
 	// ClusterConfiguredConditionType indicates whether the Redpanda cluster configuration is in sync with the desired one
 	ClusterConfiguredConditionType ClusterConditionType = "ClusterConfigured"
+	// ClusterStableConditionType is a stability indicator for the cluster that estimates if the cluster can reach quorum in its current configuration
+	ClusterStableConditionType ClusterConditionType = "ClusterStable"
 )
 
 // GetCondition return the condition of the given type
@@ -416,6 +424,16 @@ const (
 	ClusterConfiguredReasonError = "Error"
 )
 
+// These are valid reasons for ClusterStable
+const (
+	// ClusterStableNotEnoughInstances indicates that the cluster is running with less ready instances than the minimum for reaching a quorum
+	ClusterStableNotEnoughInstances = "NotEnoughInstances"
+	// ClusterStableRecovering indicates that the cluster has been in an unstable state and is getting to normal.
+	// A cluster will get to this state when the minimum number of instances for having a quorum will be ready, while
+	// transition to full stability will happen only once all nodes will reach the ready state.
+	ClusterStableRecovering = "Recovering"
+)
+
 // NodesList shows where client of Cluster custom resource can reach
 // various listeners of Redpanda cluster
 type NodesList struct {
@@ -832,6 +850,24 @@ func (r *Cluster) IsUsingMaintenanceModeHooks() bool {
 	return true
 }
 
+// IsUsingReadinessProbe tells if the cluster is configured to use the readiness probe on the pods.
+func (r *Cluster) IsUsingReadinessProbe() bool {
+	// enabled unless explicitly stated
+	if r.Spec.RestartConfig != nil && r.Spec.RestartConfig.DisableReadinessProbe != nil {
+		return !*r.Spec.RestartConfig.DisableReadinessProbe
+	}
+	return true
+}
+
+// IsUsingClusterHealthCheck tells if the cluster is configured to use wait for cluster health when restarting.
+func (r *Cluster) IsUsingClusterHealthCheck() bool {
+	// enabled unless explicitly stated
+	if r.Spec.RestartConfig != nil && r.Spec.RestartConfig.DisableClusterHealthCheck != nil {
+		return !*r.Spec.RestartConfig.DisableClusterHealthCheck
+	}
+	return true
+}
+
 // ClusterStatus
 
 // IsRestarting tells if the cluster is restarting due to a change in configuration or an upgrade in progress

@@ -652,11 +652,26 @@ spec:
                 description: RestartConfig allows to control the behavior of the cluster
                   when restarting
                 properties:
+                  disableClusterHealthCheck:
+                    description: DisableClusterHealthCheck deactivates the wait for
+                      cluster health when restarting
+                    type: boolean
                   disableMaintenanceModeHooks:
                     description: DisableMaintenanceModeHooks deactivates the preStop
                       and postStart hooks that force nodes to enter maintenance mode
                       when stopping and exit maintenance mode when up again
                     type: boolean
+                  disableReadinessProbe:
+                    description: DisableReadinessProbe deactivates the readiness probe
+                      that verifies the state of each node by querying the Redpanda
+                      admin API
+                    type: boolean
+                  healthCheckTimeoutSeconds:
+                    description: HealthCheckTimeoutSeconds configures the maximum
+                      time to wait for the cluster to become healthy before giving
+                      up
+                    format: int32
+                    type: integer
                 type: object
               sidecars:
                 description: Sidecars is list of sidecars run alongside redpanda container
@@ -800,6 +815,7 @@ spec:
                       description: Type is the type of the condition
                       enum:
                       - ClusterConfigured
+                      - ClusterStable
                       type: string
                   required:
                   - status

@@ -59,6 +59,8 @@ rules:
   - delete
   - get
   - list
+  - patch
+  - update
   - watch
 - apiGroups:
   - ""

@@ -24,11 +24,13 @@ import (
 	"github.com/redpanda-data/redpanda/src/go/k8s/pkg/networking"
 	"github.com/redpanda-data/redpanda/src/go/k8s/pkg/resources"
 	"github.com/redpanda-data/redpanda/src/go/k8s/pkg/resources/certmanager"
+	"github.com/redpanda-data/redpanda/src/go/k8s/pkg/utils"
 	"github.com/redpanda-data/redpanda/src/go/rpk/pkg/api/admin"
 	"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/util/retry"
@@ -58,7 +60,7 @@ type ClusterReconciler struct {
 //+kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;update;patch;delete
 //+kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;
 //+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;
-//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;delete
+//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;update;patch;delete
 //+kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;
 //+kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch;create;update;patch;
 //+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles;clusterrolebindings,verbs=get;list;watch;create;update;patch;
@@ -143,6 +145,8 @@ func (r *ClusterReconciler) Reconcile(
 	sa := resources.NewServiceAccount(r.Client, &redpandaCluster, r.Scheme, log)
 	configMapResource := resources.NewConfigMap(r.Client, &redpandaCluster, r.Scheme, headlessSvc.HeadlessServiceFQDN(r.clusterDomain), proxySuKey, schemaRegistrySuKey, log)
 
+	hooks := resources.NewHooksConfigMap(r.Client, &redpandaCluster, r.Scheme, log)
+
 	sts := resources.NewStatefulSet(
 		r.Client,
 		&redpandaCluster,
@@ -171,6 +175,7 @@ func (r *ClusterReconciler) Reconcile(
 		resources.NewClusterRole(r.Client, &redpandaCluster, r.Scheme, log),
 		crb,
 		resources.NewPDB(r.Client, &redpandaCluster, r.Scheme, log),
+		hooks,
 		sts,
 	}
 
@@ -312,7 +317,10 @@ func (r *ClusterReconciler) reportStatus(
 	nodeList.Internal = observedNodesInternal
 	nodeList.SchemaRegistry.Internal = fmt.Sprintf("%s:%d", clusterFQDN, schemaRegistryPort)
 
-	if statusShouldBeUpdated(&redpandaCluster.Status, nodeList, sts) {
+	stableCondition := computeStableCondition(redpandaCluster, observedPods.Items)
+	conditionChanged := redpandaCluster.Status.SetCondition(stableCondition.Type, stableCondition.Status, stableCondition.Reason, stableCondition.Message)
+
+	if conditionChanged || statusShouldBeUpdated(&redpandaCluster.Status, nodeList, sts) {
 		err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
 			var cluster redpandav1alpha1.Cluster
 			err := r.Get(ctx, types.NamespacedName{
@@ -326,6 +334,7 @@ func (r *ClusterReconciler) reportStatus(
 			cluster.Status.Nodes = *nodeList
 			cluster.Status.Replicas = sts.LastObservedState.Status.ReadyReplicas
 			cluster.Status.Version = sts.Version()
+			cluster.Status.SetCondition(stableCondition.Type, stableCondition.Status, stableCondition.Reason, stableCondition.Message)
 
 			err = r.Status().Update(ctx, &cluster)
 			if err == nil {
@@ -341,6 +350,54 @@ func (r *ClusterReconciler) reportStatus(
 	return nil
 }
 
+func computeStableCondition(
+	cluster *redpandav1alpha1.Cluster, observedPods []corev1.Pod,
+) redpandav1alpha1.ClusterCondition {
+	var requestedInstances int
+	if cluster.Spec.Replicas != nil {
+		requestedInstances = int(*cluster.Spec.Replicas)
+	} else {
+		requestedInstances = 1
+	}
+
+	quorum := (requestedInstances / 2) + 1
+
+	readyPods := 0
+	for i := range observedPods {
+		if utils.IsPodReady(&observedPods[i]) {
+			readyPods++
+		}
+	}
+
+	isStable := readyPods >= quorum
+	isFullyAvailable := readyPods >= requestedInstances
+	wasStable := cluster.Status.GetConditionStatus(redpandav1alpha1.ClusterStableConditionType) == corev1.ConditionTrue
+	if (wasStable && isStable) || (!wasStable && isFullyAvailable) {
+		return redpandav1alpha1.ClusterCondition{
+			Type:               redpandav1alpha1.ClusterStableConditionType,
+			Status:             corev1.ConditionTrue,
+			LastTransitionTime: metav1.Time{},
+			Reason:             "",
+			Message:            "",
+		}
+	} else if !wasStable && isStable {
+		return redpandav1alpha1.ClusterCondition{
+			Type:               redpandav1alpha1.ClusterStableConditionType,
+			Status:             corev1.ConditionFalse,
+			LastTransitionTime: metav1.Time{},
+			Reason:             redpandav1alpha1.ClusterStableRecovering,
+			Message:            fmt.Sprintf("Currently %d out of %d instances ready", readyPods, requestedInstances),
+		}
+	}
+	return redpandav1alpha1.ClusterCondition{
+		Type:               redpandav1alpha1.ClusterStableConditionType,
+		Status:             corev1.ConditionFalse,
+		LastTransitionTime: metav1.Time{},
+		Reason:             redpandav1alpha1.ClusterStableNotEnoughInstances,
+		Message:            fmt.Sprintf("Needed %d to reach quorum but only %d are ready", quorum, readyPods),
+	}
+}
+
 func statusShouldBeUpdated(
 	status *redpandav1alpha1.ClusterStatus,
 	nodeList *redpandav1alpha1.NodesList,