From 9db0350c4bb29d2bf6911c2e700d8424b41b4dc1 Mon Sep 17 00:00:00 2001 From: nicolaferraro Date: Fri, 10 Jun 2022 12:23:21 +0200 Subject: [PATCH] operator: operator: fix maintenance mode activation on decommissioning node (workaround for #4999) When a node is shutdown after decommission, the maintenance mode hooks will trigger. While the process has no visible effect on partitions, it leaves the cluster in an inconsistent state, so that other nodes cannot enter maintenance mode. We force reset the flag with this change. --- controllers/redpanda/suite_test.go | 8 +++++ pkg/admin/admin.go | 3 ++ pkg/resources/statefulset.go | 6 ++++ pkg/resources/statefulset_scale.go | 53 ++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) diff --git a/controllers/redpanda/suite_test.go b/controllers/redpanda/suite_test.go index 57bd1250b56b..bce30a3e193d 100644 --- a/controllers/redpanda/suite_test.go +++ b/controllers/redpanda/suite_test.go @@ -466,6 +466,14 @@ func (m *mockAdminAPI) RecommissionBroker(_ context.Context, id int) error { return m.SetBrokerStatus(id, admin.MembershipStatusActive) } +func (m *mockAdminAPI) EnableMaintenanceMode(_ context.Context, _ int) error { + return nil +} + +func (m *mockAdminAPI) DisableMaintenanceMode(_ context.Context, _ int) error { + return nil +} + // nolint:goerr113 // test code func (m *mockAdminAPI) SetBrokerStatus( id int, status admin.MembershipStatus, diff --git a/pkg/admin/admin.go b/pkg/admin/admin.go index 1512dc6e25a5..2a37d8a5ecce 100644 --- a/pkg/admin/admin.go +++ b/pkg/admin/admin.go @@ -90,6 +90,9 @@ type AdminAPIClient interface { Brokers(ctx context.Context) ([]admin.Broker, error) DecommissionBroker(ctx context.Context, node int) error RecommissionBroker(ctx context.Context, node int) error + + EnableMaintenanceMode(ctx context.Context, node int) error + DisableMaintenanceMode(ctx context.Context, node int) error } var _ AdminAPIClient = &admin.AdminAPI{} diff --git a/pkg/resources/statefulset.go b/pkg/resources/statefulset.go index b04347c72825..1a1309dd0e60 100644 --- a/pkg/resources/statefulset.go +++ b/pkg/resources/statefulset.go @@ -176,6 +176,12 @@ func (r *StatefulSetResource) Ensure(ctx context.Context) error { } r.LastObservedState = &sts + // Hack for: https://github.com/redpanda-data/redpanda/issues/4999 + err = r.disableMaintenanceModeOnDecommissionedNodes(ctx) + if err != nil { + return err + } + r.logger.Info("Running update", "resource name", r.Key().Name) err = r.runUpdate(ctx, &sts, obj.(*appsv1.StatefulSet)) if err != nil { diff --git a/pkg/resources/statefulset_scale.go b/pkg/resources/statefulset_scale.go index 81d6285e2149..c76927b8b403 100644 --- a/pkg/resources/statefulset_scale.go +++ b/pkg/resources/statefulset_scale.go @@ -11,12 +11,14 @@ package resources import ( "context" + "errors" "fmt" "github.com/go-logr/logr" redpandav1alpha1 "github.com/redpanda-data/redpanda/src/go/k8s/apis/redpanda/v1alpha1" adminutils "github.com/redpanda-data/redpanda/src/go/k8s/pkg/admin" "github.com/redpanda-data/redpanda/src/go/k8s/pkg/labels" + "github.com/redpanda-data/redpanda/src/go/k8s/pkg/resources/featuregates" "github.com/redpanda-data/redpanda/src/go/rpk/pkg/api/admin" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -261,6 +263,56 @@ func (r *StatefulSetResource) isClusterFormed( return len(brokers) > 0, nil } +// disableMaintenanceModeOnDecommissionedNodes can be used to put a cluster in a consistent state, disabling maintenance mode on +// nodes that have been decommissioned. +// +// A decommissioned node may activate maintenance mode via shutdown hooks and the cluster may enter an inconsistent state, +// preventing other pods clean shutdown. +// +// See: https://github.com/redpanda-data/redpanda/issues/4999 +func (r *StatefulSetResource) disableMaintenanceModeOnDecommissionedNodes( + ctx context.Context, +) error { + if !featuregates.MaintenanceMode(r.pandaCluster.Status.Version) { + return nil + } + + if r.pandaCluster.Status.DecommissioningNode == nil || r.pandaCluster.Status.CurrentReplicas > *r.pandaCluster.Status.DecommissioningNode { + // Only if actually in a decommissioning phase + return nil + } + + ordinal := *r.pandaCluster.Status.DecommissioningNode + targetReplicas := ordinal + + scaledDown, err := r.verifyRunningCount(ctx, targetReplicas) + if err != nil || !scaledDown { + // This should be done only when the pod disappears from the cluster + return err + } + + adminAPI, err := r.getAdminAPIClient(ctx) + if err != nil { + return err + } + + r.logger.Info("Forcing deletion of maintenance mode for the decommissioned node", "node_id", ordinal) + err = adminAPI.DisableMaintenanceMode(ctx, int(ordinal)) + if err != nil { + var httpErr *admin.HTTPResponseError + if errors.As(err, &httpErr) { + if httpErr.Response != nil && httpErr.Response.StatusCode/100 == 4 { + // Cluster says we don't need to do it + r.logger.Info("No need to disable maintenance mode on the decommissioned node", "node_id", ordinal, "status_code", httpErr.Response.StatusCode) + return nil + } + } + return fmt.Errorf("could not disable maintenance mode on decommissioning node %d: %w", ordinal, err) + } + r.logger.Info("Maintenance mode disabled for the decommissioned node", "node_id", ordinal) + return nil +} + // verifyRunningCount checks if the statefulset is configured to run the given amount of replicas and that also pods match the expectations func (r *StatefulSetResource) verifyRunningCount( ctx context.Context, replicas int32, @@ -281,6 +333,7 @@ func (r *StatefulSetResource) verifyRunningCount( if err != nil { return false, fmt.Errorf("could not list pods for checking replicas: %w", err) } + return len(podList.Items) == int(replicas), nil }