From ad6643552b34abcb589e062788176c03b0252ac0 Mon Sep 17 00:00:00 2001 From: nicolaferraro Date: Fri, 10 Jun 2022 12:23:21 +0200 Subject: [PATCH] operator: fix maintenance mode activation on decommissioning node (workaround for #4999) --- src/go/k8s/controllers/redpanda/suite_test.go | 8 +++ src/go/k8s/pkg/admin/admin.go | 3 ++ src/go/k8s/pkg/resources/statefulset.go | 6 +++ src/go/k8s/pkg/resources/statefulset_scale.go | 53 +++++++++++++++++++ 4 files changed, 70 insertions(+) diff --git a/src/go/k8s/controllers/redpanda/suite_test.go b/src/go/k8s/controllers/redpanda/suite_test.go index e60b73c3497ed..29e578a11b8dd 100644 --- a/src/go/k8s/controllers/redpanda/suite_test.go +++ b/src/go/k8s/controllers/redpanda/suite_test.go @@ -466,6 +466,14 @@ func (m *mockAdminAPI) RecommissionBroker(_ context.Context, id int) error { return m.SetBrokerStatus(id, admin.MembershipStatusActive) } +func (m *mockAdminAPI) EnableMaintenanceMode(_ context.Context, _ int) error { + return nil +} + +func (m *mockAdminAPI) DisableMaintenanceMode(_ context.Context, _ int) error { + return nil +} + // nolint:goerr113 // test code func (m *mockAdminAPI) SetBrokerStatus( id int, status admin.MembershipStatus, diff --git a/src/go/k8s/pkg/admin/admin.go b/src/go/k8s/pkg/admin/admin.go index 635497191607d..98ae54b98df29 100644 --- a/src/go/k8s/pkg/admin/admin.go +++ b/src/go/k8s/pkg/admin/admin.go @@ -90,6 +90,9 @@ type AdminAPIClient interface { Brokers(ctx context.Context) ([]admin.Broker, error) DecommissionBroker(ctx context.Context, node int) error RecommissionBroker(ctx context.Context, node int) error + + EnableMaintenanceMode(ctx context.Context, node int) error + DisableMaintenanceMode(ctx context.Context, node int) error } var _ AdminAPIClient = &admin.AdminAPI{} diff --git a/src/go/k8s/pkg/resources/statefulset.go b/src/go/k8s/pkg/resources/statefulset.go index 09e3922c78444..1aa9d6aa79c82 100644 --- a/src/go/k8s/pkg/resources/statefulset.go +++ b/src/go/k8s/pkg/resources/statefulset.go @@ -176,6 +176,12 @@ func (r *StatefulSetResource) Ensure(ctx context.Context) error { } r.LastObservedState = &sts + // Hack for: https://github.com/redpanda-data/redpanda/issues/4999 + err = r.disableMaintenanceModeOnDecommissionedNodes(ctx) + if err != nil { + return err + } + r.logger.Info("Running update", "resource name", r.Key().Name) err = r.runUpdate(ctx, &sts, obj.(*appsv1.StatefulSet)) if err != nil { diff --git a/src/go/k8s/pkg/resources/statefulset_scale.go b/src/go/k8s/pkg/resources/statefulset_scale.go index d315f15566196..bd55db3cb1e69 100644 --- a/src/go/k8s/pkg/resources/statefulset_scale.go +++ b/src/go/k8s/pkg/resources/statefulset_scale.go @@ -11,12 +11,14 @@ package resources import ( "context" + "errors" "fmt" "github.com/go-logr/logr" redpandav1alpha1 "github.com/redpanda-data/redpanda/src/go/k8s/apis/redpanda/v1alpha1" adminutils "github.com/redpanda-data/redpanda/src/go/k8s/pkg/admin" "github.com/redpanda-data/redpanda/src/go/k8s/pkg/labels" + "github.com/redpanda-data/redpanda/src/go/k8s/pkg/resources/featuregates" "github.com/redpanda-data/redpanda/src/go/rpk/pkg/api/admin" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -261,6 +263,56 @@ func (r *StatefulSetResource) isClusterFormed( return len(brokers) > 0, nil } +// disableMaintenanceModeOnDecommissionedNodes can be used to put a cluster in a consistent state, disabling maintenance mode on +// nodes that have been decommissioned. +// +// A decommissioned node may activate maintenance mode via shutdown hooks and the cluster may enter an inconsistent state, +// preventing other pods clean shutdown. +// +// See: https://github.com/redpanda-data/redpanda/issues/4999 +func (r *StatefulSetResource) disableMaintenanceModeOnDecommissionedNodes( + ctx context.Context, +) error { + if !featuregates.MaintenanceMode(r.pandaCluster.Status.Version) { + return nil + } + + if r.pandaCluster.Status.DecommissioningNode == nil || r.pandaCluster.Status.DesiredReplicas > *r.pandaCluster.Status.DecommissioningNode { + // Only if actually in a decommissioning phase + return nil + } + + ordinal := *r.pandaCluster.Status.DecommissioningNode + targetReplicas := ordinal + + scaledDown, err := r.verifyRunningCount(ctx, targetReplicas) + if err != nil || !scaledDown { + // This should be done only when the pod disappears from the cluster + return err + } + + adminAPI, err := r.getAdminAPIClient(ctx) + if err != nil { + return err + } + + r.logger.Info("Forcing deletion of maintenance mode for the decommissioned node", "node_id", ordinal) + err = adminAPI.DisableMaintenanceMode(ctx, int(ordinal)) + if err != nil { + var httpErr *admin.HTTPResponseError + if errors.As(err, &httpErr) { + if httpErr.Response != nil && httpErr.Response.StatusCode/100 == 4 { + // Cluster says we don't need to do it + r.logger.Info("No need to disable maintenance mode on the decommissioned node", "node_id", ordinal, "status_code", httpErr.Response.StatusCode) + return nil + } + } + return fmt.Errorf("could not disable maintenance mode on decommissioning node %d: %w", ordinal, err) + } + r.logger.Info("Maintenance mode disabled for the decommissioned node", "node_id", ordinal) + return nil +} + // verifyRunningCount checks if the statefulset is configured to run the given amount of replicas and that also pods match the expectations func (r *StatefulSetResource) verifyRunningCount( ctx context.Context, replicas int32, @@ -281,6 +333,7 @@ func (r *StatefulSetResource) verifyRunningCount( if err != nil { return false, fmt.Errorf("could not list pods for checking replicas: %w", err) } + return len(podList.Items) == int(replicas), nil }