From ad6643552b34abcb589e062788176c03b0252ac0 Mon Sep 17 00:00:00 2001
From: nicolaferraro <ni.ferraro@gmail.com>
Date: Fri, 10 Jun 2022 12:23:21 +0200
Subject: [PATCH] operator: fix maintenance mode activation on decommissioning
 node (workaround for #4999)

---
 src/go/k8s/controllers/redpanda/suite_test.go |  8 +++
 src/go/k8s/pkg/admin/admin.go                 |  3 ++
 src/go/k8s/pkg/resources/statefulset.go       |  6 +++
 src/go/k8s/pkg/resources/statefulset_scale.go | 53 +++++++++++++++++++
 4 files changed, 70 insertions(+)

diff --git a/src/go/k8s/controllers/redpanda/suite_test.go b/src/go/k8s/controllers/redpanda/suite_test.go
index e60b73c3497ed..29e578a11b8dd 100644
--- a/src/go/k8s/controllers/redpanda/suite_test.go
+++ b/src/go/k8s/controllers/redpanda/suite_test.go
@@ -466,6 +466,14 @@ func (m *mockAdminAPI) RecommissionBroker(_ context.Context, id int) error {
 	return m.SetBrokerStatus(id, admin.MembershipStatusActive)
 }
 
+func (m *mockAdminAPI) EnableMaintenanceMode(_ context.Context, _ int) error {
+	return nil
+}
+
+func (m *mockAdminAPI) DisableMaintenanceMode(_ context.Context, _ int) error {
+	return nil
+}
+
 // nolint:goerr113 // test code
 func (m *mockAdminAPI) SetBrokerStatus(
 	id int, status admin.MembershipStatus,
diff --git a/src/go/k8s/pkg/admin/admin.go b/src/go/k8s/pkg/admin/admin.go
index 635497191607d..98ae54b98df29 100644
--- a/src/go/k8s/pkg/admin/admin.go
+++ b/src/go/k8s/pkg/admin/admin.go
@@ -90,6 +90,9 @@ type AdminAPIClient interface {
 	Brokers(ctx context.Context) ([]admin.Broker, error)
 	DecommissionBroker(ctx context.Context, node int) error
 	RecommissionBroker(ctx context.Context, node int) error
+
+	EnableMaintenanceMode(ctx context.Context, node int) error
+	DisableMaintenanceMode(ctx context.Context, node int) error
 }
 
 var _ AdminAPIClient = &admin.AdminAPI{}
diff --git a/src/go/k8s/pkg/resources/statefulset.go b/src/go/k8s/pkg/resources/statefulset.go
index 09e3922c78444..1aa9d6aa79c82 100644
--- a/src/go/k8s/pkg/resources/statefulset.go
+++ b/src/go/k8s/pkg/resources/statefulset.go
@@ -176,6 +176,12 @@ func (r *StatefulSetResource) Ensure(ctx context.Context) error {
 	}
 	r.LastObservedState = &sts
 
+	// Hack for: https://github.com/redpanda-data/redpanda/issues/4999
+	err = r.disableMaintenanceModeOnDecommissionedNodes(ctx)
+	if err != nil {
+		return err
+	}
+
 	r.logger.Info("Running update", "resource name", r.Key().Name)
 	err = r.runUpdate(ctx, &sts, obj.(*appsv1.StatefulSet))
 	if err != nil {
diff --git a/src/go/k8s/pkg/resources/statefulset_scale.go b/src/go/k8s/pkg/resources/statefulset_scale.go
index d315f15566196..bd55db3cb1e69 100644
--- a/src/go/k8s/pkg/resources/statefulset_scale.go
+++ b/src/go/k8s/pkg/resources/statefulset_scale.go
@@ -11,12 +11,14 @@ package resources
 
 import (
 	"context"
+	"errors"
 	"fmt"
 
 	"github.com/go-logr/logr"
 	redpandav1alpha1 "github.com/redpanda-data/redpanda/src/go/k8s/apis/redpanda/v1alpha1"
 	adminutils "github.com/redpanda-data/redpanda/src/go/k8s/pkg/admin"
 	"github.com/redpanda-data/redpanda/src/go/k8s/pkg/labels"
+	"github.com/redpanda-data/redpanda/src/go/k8s/pkg/resources/featuregates"
 	"github.com/redpanda-data/redpanda/src/go/rpk/pkg/api/admin"
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
@@ -261,6 +263,56 @@ func (r *StatefulSetResource) isClusterFormed(
 	return len(brokers) > 0, nil
 }
 
+// disableMaintenanceModeOnDecommissionedNodes can be used to put a cluster in a consistent state, disabling maintenance mode on
+// nodes that have been decommissioned.
+//
+// A decommissioned node may activate maintenance mode via shutdown hooks and the cluster may enter an inconsistent state,
+// preventing other pods clean shutdown.
+//
+// See: https://github.com/redpanda-data/redpanda/issues/4999
+func (r *StatefulSetResource) disableMaintenanceModeOnDecommissionedNodes(
+	ctx context.Context,
+) error {
+	if !featuregates.MaintenanceMode(r.pandaCluster.Status.Version) {
+		return nil
+	}
+
+	if r.pandaCluster.Status.DecommissioningNode == nil || r.pandaCluster.Status.DesiredReplicas > *r.pandaCluster.Status.DecommissioningNode {
+		// Only if actually in a decommissioning phase
+		return nil
+	}
+
+	ordinal := *r.pandaCluster.Status.DecommissioningNode
+	targetReplicas := ordinal
+
+	scaledDown, err := r.verifyRunningCount(ctx, targetReplicas)
+	if err != nil || !scaledDown {
+		// This should be done only when the pod disappears from the cluster
+		return err
+	}
+
+	adminAPI, err := r.getAdminAPIClient(ctx)
+	if err != nil {
+		return err
+	}
+
+	r.logger.Info("Forcing deletion of maintenance mode for the decommissioned node", "node_id", ordinal)
+	err = adminAPI.DisableMaintenanceMode(ctx, int(ordinal))
+	if err != nil {
+		var httpErr *admin.HTTPResponseError
+		if errors.As(err, &httpErr) {
+			if httpErr.Response != nil && httpErr.Response.StatusCode/100 == 4 {
+				// Cluster says we don't need to do it
+				r.logger.Info("No need to disable maintenance mode on the decommissioned node", "node_id", ordinal, "status_code", httpErr.Response.StatusCode)
+				return nil
+			}
+		}
+		return fmt.Errorf("could not disable maintenance mode on decommissioning node %d: %w", ordinal, err)
+	}
+	r.logger.Info("Maintenance mode disabled for the decommissioned node", "node_id", ordinal)
+	return nil
+}
+
 // verifyRunningCount checks if the statefulset is configured to run the given amount of replicas and that also pods match the expectations
 func (r *StatefulSetResource) verifyRunningCount(
 	ctx context.Context, replicas int32,
@@ -281,6 +333,7 @@ func (r *StatefulSetResource) verifyRunningCount(
 	if err != nil {
 		return false, fmt.Errorf("could not list pods for checking replicas: %w", err)
 	}
+
 	return len(podList.Items) == int(replicas), nil
 }