From a99e31067a789ae1342c0c89cc061a746ede6663 Mon Sep 17 00:00:00 2001 From: nicolaferraro Date: Fri, 10 Jun 2022 12:23:21 +0200 Subject: [PATCH] operator: fix maintenance mode activation on decommissioning node (workaround for #4999) When a node is shutdown after decommission, the maintenance mode hooks will trigger. While the process has no visible effect on partitions, it leaves the cluster in an inconsistent state, so that other nodes cannot enter maintenance mode. We force reset the flag with this change. --- src/go/k8s/controllers/redpanda/suite_test.go | 8 +++ src/go/k8s/pkg/admin/admin.go | 3 ++ src/go/k8s/pkg/resources/statefulset.go | 6 +++ src/go/k8s/pkg/resources/statefulset_scale.go | 53 +++++++++++++++++++ 4 files changed, 70 insertions(+) diff --git a/src/go/k8s/controllers/redpanda/suite_test.go b/src/go/k8s/controllers/redpanda/suite_test.go index e60b73c3497ed..29e578a11b8dd 100644 --- a/src/go/k8s/controllers/redpanda/suite_test.go +++ b/src/go/k8s/controllers/redpanda/suite_test.go @@ -466,6 +466,14 @@ func (m *mockAdminAPI) RecommissionBroker(_ context.Context, id int) error { return m.SetBrokerStatus(id, admin.MembershipStatusActive) } +func (m *mockAdminAPI) EnableMaintenanceMode(_ context.Context, _ int) error { + return nil +} + +func (m *mockAdminAPI) DisableMaintenanceMode(_ context.Context, _ int) error { + return nil +} + // nolint:goerr113 // test code func (m *mockAdminAPI) SetBrokerStatus( id int, status admin.MembershipStatus, diff --git a/src/go/k8s/pkg/admin/admin.go b/src/go/k8s/pkg/admin/admin.go index 8b2a7ccfb154c..d5bf771c85e2f 100644 --- a/src/go/k8s/pkg/admin/admin.go +++ b/src/go/k8s/pkg/admin/admin.go @@ -90,6 +90,9 @@ type AdminAPIClient interface { Brokers(ctx context.Context) ([]admin.Broker, error) DecommissionBroker(ctx context.Context, node int) error RecommissionBroker(ctx context.Context, node int) error + + EnableMaintenanceMode(ctx context.Context, node int) error + DisableMaintenanceMode(ctx context.Context, node int) error } var _ AdminAPIClient = &admin.AdminAPI{} diff --git a/src/go/k8s/pkg/resources/statefulset.go b/src/go/k8s/pkg/resources/statefulset.go index b04347c72825b..1a1309dd0e603 100644 --- a/src/go/k8s/pkg/resources/statefulset.go +++ b/src/go/k8s/pkg/resources/statefulset.go @@ -176,6 +176,12 @@ func (r *StatefulSetResource) Ensure(ctx context.Context) error { } r.LastObservedState = &sts + // Hack for: https://github.com/redpanda-data/redpanda/issues/4999 + err = r.disableMaintenanceModeOnDecommissionedNodes(ctx) + if err != nil { + return err + } + r.logger.Info("Running update", "resource name", r.Key().Name) err = r.runUpdate(ctx, &sts, obj.(*appsv1.StatefulSet)) if err != nil { diff --git a/src/go/k8s/pkg/resources/statefulset_scale.go b/src/go/k8s/pkg/resources/statefulset_scale.go index 0f62b324f2ed7..7aec882d08a8a 100644 --- a/src/go/k8s/pkg/resources/statefulset_scale.go +++ b/src/go/k8s/pkg/resources/statefulset_scale.go @@ -11,12 +11,14 @@ package resources import ( "context" + "errors" "fmt" "github.com/go-logr/logr" redpandav1alpha1 "github.com/redpanda-data/redpanda/src/go/k8s/apis/redpanda/v1alpha1" adminutils "github.com/redpanda-data/redpanda/src/go/k8s/pkg/admin" "github.com/redpanda-data/redpanda/src/go/k8s/pkg/labels" + "github.com/redpanda-data/redpanda/src/go/k8s/pkg/resources/featuregates" "github.com/redpanda-data/redpanda/src/go/rpk/pkg/api/admin" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -261,6 +263,56 @@ func (r *StatefulSetResource) isClusterFormed( return len(brokers) > 0, nil } +// disableMaintenanceModeOnDecommissionedNodes can be used to put a cluster in a consistent state, disabling maintenance mode on +// nodes that have been decommissioned. +// +// A decommissioned node may activate maintenance mode via shutdown hooks and the cluster may enter an inconsistent state, +// preventing other pods clean shutdown. +// +// See: https://github.com/redpanda-data/redpanda/issues/4999 +func (r *StatefulSetResource) disableMaintenanceModeOnDecommissionedNodes( + ctx context.Context, +) error { + if !featuregates.MaintenanceMode(r.pandaCluster.Status.Version) { + return nil + } + + if r.pandaCluster.Status.DecommissioningNode == nil || r.pandaCluster.Status.CurrentReplicas > *r.pandaCluster.Status.DecommissioningNode { + // Only if actually in a decommissioning phase + return nil + } + + ordinal := *r.pandaCluster.Status.DecommissioningNode + targetReplicas := ordinal + + scaledDown, err := r.verifyRunningCount(ctx, targetReplicas) + if err != nil || !scaledDown { + // This should be done only when the pod disappears from the cluster + return err + } + + adminAPI, err := r.getAdminAPIClient(ctx) + if err != nil { + return err + } + + r.logger.Info("Forcing deletion of maintenance mode for the decommissioned node", "node_id", ordinal) + err = adminAPI.DisableMaintenanceMode(ctx, int(ordinal)) + if err != nil { + var httpErr *admin.HTTPResponseError + if errors.As(err, &httpErr) { + if httpErr.Response != nil && httpErr.Response.StatusCode/100 == 4 { + // Cluster says we don't need to do it + r.logger.Info("No need to disable maintenance mode on the decommissioned node", "node_id", ordinal, "status_code", httpErr.Response.StatusCode) + return nil + } + } + return fmt.Errorf("could not disable maintenance mode on decommissioning node %d: %w", ordinal, err) + } + r.logger.Info("Maintenance mode disabled for the decommissioned node", "node_id", ordinal) + return nil +} + // verifyRunningCount checks if the statefulset is configured to run the given amount of replicas and that also pods match the expectations func (r *StatefulSetResource) verifyRunningCount( ctx context.Context, replicas int32, @@ -281,6 +333,7 @@ func (r *StatefulSetResource) verifyRunningCount( if err != nil { return false, fmt.Errorf("could not list pods for checking replicas: %w", err) } + return len(podList.Items) == int(replicas), nil }