diff --git a/src/go/k8s/controllers/redpanda/suite_test.go b/src/go/k8s/controllers/redpanda/suite_test.go index c5960fe078ef0..4c3751a31e2f6 100644 --- a/src/go/k8s/controllers/redpanda/suite_test.go +++ b/src/go/k8s/controllers/redpanda/suite_test.go @@ -529,6 +529,12 @@ func (m *mockAdminAPI) DisableMaintenanceMode(_ context.Context, _ int) error { return nil } +func (m *mockAdminAPI) GetHealthOverview(_ context.Context) (admin.ClusterHealthOverview, error) { + return admin.ClusterHealthOverview{ + IsHealthy: true, + }, nil +} + //nolint:goerr113 // test code func (m *mockAdminAPI) SetBrokerStatus( id int, status admin.MembershipStatus, diff --git a/src/go/k8s/pkg/admin/admin.go b/src/go/k8s/pkg/admin/admin.go index 728b8db67ca4c..4962683bcddda 100644 --- a/src/go/k8s/pkg/admin/admin.go +++ b/src/go/k8s/pkg/admin/admin.go @@ -97,6 +97,8 @@ type AdminAPIClient interface { EnableMaintenanceMode(ctx context.Context, node int) error DisableMaintenanceMode(ctx context.Context, node int) error + + GetHealthOverview(ctx context.Context) (admin.ClusterHealthOverview, error) } var _ AdminAPIClient = &admin.AdminAPI{} diff --git a/src/go/k8s/pkg/resources/statefulset_update.go b/src/go/k8s/pkg/resources/statefulset_update.go index 8200463031d70..dfbc22168856e 100644 --- a/src/go/k8s/pkg/resources/statefulset_update.go +++ b/src/go/k8s/pkg/resources/statefulset_update.go @@ -82,6 +82,10 @@ func (r *StatefulSetResource) runUpdate( return err } + if err = r.isClusterHealthy(ctx); err != nil { + return err + } + if err = r.rollingUpdate(ctx, &modified.Spec.Template); err != nil { return err } @@ -94,6 +98,32 @@ func (r *StatefulSetResource) runUpdate( return nil } +func (r *StatefulSetResource) isClusterHealthy(ctx context.Context) error { + adminAPIClient, err := r.getAdminAPIClient(ctx) + if err != nil { + return fmt.Errorf("creating admin API client: %w", err) + } + + health, err := adminAPIClient.GetHealthOverview(ctx) + if err != nil { + return fmt.Errorf("getting cluster health overview: %w", err) + } + + restarting := "not restarting" + if r.pandaCluster.Status.IsRestarting() { + restarting = "restarting" + } + + if !health.IsHealthy { + return &RequeueAfterError{ + RequeueAfter: RequeueDuration, + Msg: fmt.Sprintf("wait for cluster to become healthy (cluster %s)", restarting), + } + } + + return nil +} + func (r *StatefulSetResource) rollingUpdate( ctx context.Context, template *corev1.PodTemplateSpec, ) error {