Skip to content

Commit

Permalink
Get cluster health before an update
Browse files Browse the repository at this point in the history
As per redpanda-data#3023 the cluster should
be healthy before starting put node in maintanance mode and after POD is
restarted.
  • Loading branch information
Rafal Korepta committed Nov 28, 2022
1 parent 3a0df25 commit dc393e8
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 0 deletions.
6 changes: 6 additions & 0 deletions src/go/k8s/controllers/redpanda/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,12 @@ func (m *mockAdminAPI) DisableMaintenanceMode(_ context.Context, _ int) error {
return nil
}

func (m *mockAdminAPI) GetHealthOverview(_ context.Context) (admin.ClusterHealthOverview, error) {
return admin.ClusterHealthOverview{
IsHealthy: true,
}, nil
}

//nolint:goerr113 // test code
func (m *mockAdminAPI) SetBrokerStatus(
id int, status admin.MembershipStatus,
Expand Down
2 changes: 2 additions & 0 deletions src/go/k8s/pkg/admin/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ type AdminAPIClient interface {

EnableMaintenanceMode(ctx context.Context, node int) error
DisableMaintenanceMode(ctx context.Context, node int) error

GetHealthOverview(ctx context.Context) (admin.ClusterHealthOverview, error)
}

var _ AdminAPIClient = &admin.AdminAPI{}
Expand Down
30 changes: 30 additions & 0 deletions src/go/k8s/pkg/resources/statefulset_update.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ func (r *StatefulSetResource) runUpdate(
return err
}

if err = r.isClusterHealthy(ctx); err != nil {
return err
}

if err = r.rollingUpdate(ctx, &modified.Spec.Template); err != nil {
return err
}
Expand All @@ -94,6 +98,32 @@ func (r *StatefulSetResource) runUpdate(
return nil
}

func (r *StatefulSetResource) isClusterHealthy(ctx context.Context) error {
adminAPIClient, err := r.getAdminAPIClient(ctx)
if err != nil {
return fmt.Errorf("creating admin API client: %w", err)
}

health, err := adminAPIClient.GetHealthOverview(ctx)
if err != nil {
return fmt.Errorf("getting cluster health overview: %w", err)
}

restarting := "not restarting"
if r.pandaCluster.Status.IsRestarting() {
restarting = "restarting"
}

if !health.IsHealthy {
return &RequeueAfterError{
RequeueAfter: RequeueDuration,
Msg: fmt.Sprintf("wait for cluster to become healthy (cluster %s)", restarting),
}
}

return nil
}

func (r *StatefulSetResource) rollingUpdate(
ctx context.Context, template *corev1.PodTemplateSpec,
) error {
Expand Down

0 comments on commit dc393e8

Please sign in to comment.