diff --git a/internal/controllers/topology/cluster/desired_state.go b/internal/controllers/topology/cluster/desired_state.go index 4dd951024ec6..a9637f49ed69 100644 --- a/internal/controllers/topology/cluster/desired_state.go +++ b/internal/controllers/topology/cluster/desired_state.go @@ -460,6 +460,7 @@ func (r *Reconciler) computeControlPlaneVersion(ctx context.Context, s *scope.Sc // Control plane and machine deployments are stable. All the required hook are called. // Ready to pick up the topology version. + s.UpgradeTracker.ControlPlane.PendingUpgrade = false return desiredVersion, nil } diff --git a/internal/controllers/topology/cluster/desired_state_test.go b/internal/controllers/topology/cluster/desired_state_test.go index dadad6913644..ebfed2619eaf 100644 --- a/internal/controllers/topology/cluster/desired_state_test.go +++ b/internal/controllers/topology/cluster/desired_state_test.go @@ -27,6 +27,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/sets" utilfeature "k8s.io/component-base/featuregate/testing" "k8s.io/utils/pointer" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -916,6 +917,9 @@ func TestComputeControlPlaneVersion(t *testing.T) { } else { g.Expect(err).To(BeNil()) g.Expect(version).To(Equal(tt.expectedVersion)) + // Verify that if the upgrade is pending it is captured in the upgrade tracker. + upgradePending := tt.expectedVersion != tt.topologyVersion + g.Expect(s.UpgradeTracker.ControlPlane.PendingUpgrade).To(Equal(upgradePending)) } }) } @@ -1935,6 +1939,20 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { version, err := computeMachineDeploymentVersion(s, tt.machineDeploymentTopology, desiredControlPlaneState, tt.currentMachineDeploymentState) g.Expect(err).NotTo(HaveOccurred()) g.Expect(version).To(Equal(tt.expectedVersion)) + + // Verify that if the upgrade is pending it is captured in the upgrade tracker. + if tt.currentMachineDeploymentState != nil { + mdName := tt.currentMachineDeploymentState.Object.Name + deferredUpgrade := sets.New[string](s.UpgradeTracker.MachineDeployments.DeferredUpgradeNames()...).Has(mdName) + if !deferredUpgrade { + pendingUpgrade := tt.expectedVersion != tt.topologyVersion + if pendingUpgrade { + g.Expect(s.UpgradeTracker.MachineDeployments.PendingUpgradeNames()).To(ContainElement(mdName), "MachineDeployment should be marked as pending upgrade") + } else { + g.Expect(s.UpgradeTracker.MachineDeployments.PendingUpgradeNames()).NotTo(ContainElement(mdName), "MachineDeployment should not be marked as pending upgrade") + } + } + } }) } } diff --git a/internal/controllers/topology/cluster/reconcile_state.go b/internal/controllers/topology/cluster/reconcile_state.go index 750ae5183386..9d0f1fead5b2 100644 --- a/internal/controllers/topology/cluster/reconcile_state.go +++ b/internal/controllers/topology/cluster/reconcile_state.go @@ -307,6 +307,18 @@ func (r *Reconciler) reconcileInfrastructureCluster(ctx context.Context, s *scop // reconcileControlPlane works to bring the current state of a managed topology in line with the desired state. This involves // updating the cluster where needed. func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope) error { + // If the ControlPlane has defined a current or desired MachineHealthCheck attempt to reconcile it. + if s.Desired.ControlPlane.MachineHealthCheck != nil || s.Current.ControlPlane.MachineHealthCheck != nil { + // Reconcile the current and desired state of the MachineHealthCheck. + if err := r.reconcileMachineHealthCheck(ctx, s.Current.ControlPlane.MachineHealthCheck, s.Desired.ControlPlane.MachineHealthCheck); err != nil { + return err + } + } + + // Return early if the control plane reconciliation is on hold. + if s.UpgradeTracker.ControlPlane.HoldReconcile() { + return nil + } // If the clusterClass mandates the controlPlane has infrastructureMachines, reconcile it. if s.Blueprint.HasControlPlaneInfrastructureMachine() { ctx, _ := tlog.LoggerFrom(ctx).WithObject(s.Desired.ControlPlane.InfrastructureMachineTemplate).Into(ctx) @@ -361,13 +373,6 @@ func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope) } } - // If the ControlPlane has defined a current or desired MachineHealthCheck attempt to reconcile it. - if s.Desired.ControlPlane.MachineHealthCheck != nil || s.Current.ControlPlane.MachineHealthCheck != nil { - // Reconcile the current and desired state of the MachineHealthCheck. - if err := r.reconcileMachineHealthCheck(ctx, s.Current.ControlPlane.MachineHealthCheck, s.Desired.ControlPlane.MachineHealthCheck); err != nil { - return err - } - } return nil } @@ -466,7 +471,10 @@ func (r *Reconciler) reconcileMachineDeployments(ctx context.Context, s *scope.S for _, mdTopologyName := range diff.toUpdate { currentMD := s.Current.MachineDeployments[mdTopologyName] desiredMD := s.Desired.MachineDeployments[mdTopologyName] - if err := r.updateMachineDeployment(ctx, s.Current.Cluster, mdTopologyName, currentMD, desiredMD); err != nil { + if s.UpgradeTracker.MachineDeployments.HoldReconcile(currentMD.Object.Name) { + continue + } + if err := r.updateMachineDeployment(ctx, s, mdTopologyName, currentMD, desiredMD); err != nil { return err } } @@ -522,9 +530,22 @@ func (r *Reconciler) createMachineDeployment(ctx context.Context, cluster *clust } // updateMachineDeployment updates a MachineDeployment. Also rotates the corresponding Templates if necessary. -func (r *Reconciler) updateMachineDeployment(ctx context.Context, cluster *clusterv1.Cluster, mdTopologyName string, currentMD, desiredMD *scope.MachineDeploymentState) error { +func (r *Reconciler) updateMachineDeployment(ctx context.Context, s *scope.Scope, mdTopologyName string, currentMD, desiredMD *scope.MachineDeploymentState) error { log := tlog.LoggerFrom(ctx).WithMachineDeployment(desiredMD.Object) + // Patch MachineHealthCheck for the MachineDeployment. + if desiredMD.MachineHealthCheck != nil || currentMD.MachineHealthCheck != nil { + if err := r.reconcileMachineHealthCheck(ctx, currentMD.MachineHealthCheck, desiredMD.MachineHealthCheck); err != nil { + return err + } + } + + // Return early if the MachineDeployment reconciliation is on hold. + if s.UpgradeTracker.MachineDeployments.HoldReconcile(currentMD.Object.Name) { + return nil + } + + cluster := s.Current.Cluster infraCtx, _ := log.WithObject(desiredMD.InfrastructureMachineTemplate).Into(ctx) if err := r.reconcileReferencedTemplate(infraCtx, reconcileReferencedTemplateInput{ cluster: cluster, @@ -549,13 +570,6 @@ func (r *Reconciler) updateMachineDeployment(ctx context.Context, cluster *clust return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMD.Object}) } - // Patch MachineHealthCheck for the MachineDeployment. - if desiredMD.MachineHealthCheck != nil || currentMD.MachineHealthCheck != nil { - if err := r.reconcileMachineHealthCheck(ctx, currentMD.MachineHealthCheck, desiredMD.MachineHealthCheck); err != nil { - return err - } - } - // Check differences between current and desired MachineDeployment, and eventually patch the current object. log = log.WithObject(desiredMD.Object) patchHelper, err := r.patchHelperFactory(ctx, currentMD.Object, desiredMD.Object) diff --git a/internal/controllers/topology/cluster/reconcile_state_test.go b/internal/controllers/topology/cluster/reconcile_state_test.go index 7feb3e2be1a1..7d29a8eba9be 100644 --- a/internal/controllers/topology/cluster/reconcile_state_test.go +++ b/internal/controllers/topology/cluster/reconcile_state_test.go @@ -1182,12 +1182,16 @@ func TestReconcileControlPlane(t *testing.T) { gvk.Kind = "KindChanged" infrastructureMachineTemplateWithIncompatibleChanges.SetGroupVersionKind(gvk) + upgradeTrackerWithControlPlanePendingUpgrade := scope.NewUpgradeTracker() + upgradeTrackerWithControlPlanePendingUpgrade.ControlPlane.PendingUpgrade = true + tests := []struct { name string class *scope.ControlPlaneBlueprint original *scope.ControlPlaneState controlPlaneExternalChanges string machineInfrastructureExternalChanges string + upgradeTracker *scope.UpgradeTracker desired *scope.ControlPlaneState want *scope.ControlPlaneState wantRotation bool @@ -1210,6 +1214,15 @@ func TestReconcileControlPlane(t *testing.T) { want: &scope.ControlPlaneState{Object: controlPlaneWithoutInfrastructureWithChanges.DeepCopy()}, wantErr: false, }, + { + name: "Should not update the ControlPlane if ControlPlane is pending upgrade", + class: ccWithoutControlPlaneInfrastructure, + upgradeTracker: upgradeTrackerWithControlPlanePendingUpgrade, + original: &scope.ControlPlaneState{Object: controlPlaneWithoutInfrastructure.DeepCopy()}, + desired: &scope.ControlPlaneState{Object: controlPlaneWithoutInfrastructureWithChanges.DeepCopy()}, + want: &scope.ControlPlaneState{Object: controlPlaneWithoutInfrastructure.DeepCopy()}, + wantErr: false, + }, { name: "Should preserve external changes to ControlPlane without machine infrastructure", class: ccWithoutControlPlaneInfrastructure, @@ -1311,6 +1324,9 @@ func TestReconcileControlPlane(t *testing.T) { Ref: contract.ObjToRef(tt.class.InfrastructureMachineTemplate), } } + if tt.upgradeTracker != nil { + s.UpgradeTracker = tt.upgradeTracker + } s.Current.ControlPlane = &scope.ControlPlaneState{} if tt.original != nil { @@ -1650,6 +1666,8 @@ func TestReconcileMachineDeployments(t *testing.T) { infrastructureMachineTemplate2WithChanges := infrastructureMachineTemplate2.DeepCopy() g.Expect(unstructured.SetNestedField(infrastructureMachineTemplate2WithChanges.Object, "foo", "spec", "template", "spec", "foo")).To(Succeed()) md2WithRotatedInfrastructureMachineTemplate := newFakeMachineDeploymentTopologyState("md-2", infrastructureMachineTemplate2WithChanges, bootstrapTemplate2, nil) + upgradeTrackerWithMD2PendingUpgrade := scope.NewUpgradeTracker() + upgradeTrackerWithMD2PendingUpgrade.MachineDeployments.MarkPendingUpgrade("md-2") infrastructureMachineTemplate3 := builder.TestInfrastructureMachineTemplate(metav1.NamespaceDefault, "infrastructure-machine-3").Build() bootstrapTemplate3 := builder.TestBootstrapTemplate(metav1.NamespaceDefault, "bootstrap-config-3").Build() @@ -1725,6 +1743,7 @@ func TestReconcileMachineDeployments(t *testing.T) { name string current []*scope.MachineDeploymentState desired []*scope.MachineDeploymentState + upgradeTracker *scope.UpgradeTracker want []*scope.MachineDeploymentState wantInfrastructureMachineTemplateRotation map[string]bool wantBootstrapTemplateRotation map[string]bool @@ -1752,6 +1771,15 @@ func TestReconcileMachineDeployments(t *testing.T) { wantInfrastructureMachineTemplateRotation: map[string]bool{"md-2": true}, wantErr: false, }, + { + name: "Should not update MachineDeployment if MachineDeployment is pending upgrade", + current: []*scope.MachineDeploymentState{md2}, + desired: []*scope.MachineDeploymentState{md2WithRotatedInfrastructureMachineTemplate}, + upgradeTracker: upgradeTrackerWithMD2PendingUpgrade, + want: []*scope.MachineDeploymentState{md2}, + wantInfrastructureMachineTemplateRotation: map[string]bool{"md-2": false}, + wantErr: false, + }, { name: "Should update MachineDeployment with BootstrapTemplate rotation", current: []*scope.MachineDeploymentState{md3}, @@ -1849,6 +1877,10 @@ func TestReconcileMachineDeployments(t *testing.T) { s.Desired = &scope.ClusterState{MachineDeployments: toMachineDeploymentTopologyStateMap(tt.desired)} + if tt.upgradeTracker != nil { + s.UpgradeTracker = tt.upgradeTracker + } + r := Reconciler{ Client: env, patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache()), diff --git a/internal/controllers/topology/cluster/scope/upgradetracker.go b/internal/controllers/topology/cluster/scope/upgradetracker.go index 7bd66d658397..247a3915986d 100644 --- a/internal/controllers/topology/cluster/scope/upgradetracker.go +++ b/internal/controllers/topology/cluster/scope/upgradetracker.go @@ -41,6 +41,12 @@ type ControlPlaneUpgradeTracker struct { IsScaling bool } +// HoldReconcile returns true if the control plane reconciliation needs to be held back. +// Example: Control Plane reconciliation needs to be held back if it is pending an upgrade. +func (c *ControlPlaneUpgradeTracker) HoldReconcile() bool { + return c.PendingUpgrade +} + // MachineDeploymentUpgradeTracker holds the current upgrade status and makes upgrade // decisions for MachineDeployments. type MachineDeploymentUpgradeTracker struct { @@ -181,3 +187,9 @@ func (m *MachineDeploymentUpgradeTracker) DeferredUpgradeNames() []string { func (m *MachineDeploymentUpgradeTracker) DeferredUpgrade() bool { return len(m.deferredNames) != 0 } + +// HoldReconcile returns true if the MachineDeployment reconciliation needs to be held back. +// Example: MachineDeployment reconciliation needs to be held back if it is pending an upgrade. +func (m *MachineDeploymentUpgradeTracker) HoldReconcile(name string) bool { + return m.pendingNames.Has(name) +} diff --git a/internal/controllers/topology/cluster/scope/upgradetracker_test.go b/internal/controllers/topology/cluster/scope/upgradetracker_test.go index 49dfcb490c65..fd9e9a26c0b7 100644 --- a/internal/controllers/topology/cluster/scope/upgradetracker_test.go +++ b/internal/controllers/topology/cluster/scope/upgradetracker_test.go @@ -53,3 +53,66 @@ func TestNewUpgradeTracker(t *testing.T) { } }) } + +func TestUpgradeTrackerHoldReconcile(t *testing.T) { + t.Run("should return the correct value for ControlPlaneUpgradeTracker", func(t *testing.T) { + tests := []struct { + name string + pendingUpgrade bool + want bool + }{ + { + name: "should return true if the control plane is pending an upgrade", + pendingUpgrade: true, + want: true, + }, + { + name: "should return false if the control plane is not pending as upgrade", + pendingUpgrade: false, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + tracker := NewUpgradeTracker() + tracker.ControlPlane.PendingUpgrade = tt.pendingUpgrade + g.Expect(tracker.ControlPlane.HoldReconcile()).To(Equal(tt.want)) + }) + } + }) + + t.Run("should return the correct value for MachineDeploymentUpgradeTracker", func(t *testing.T) { + tests := []struct { + name string + pendingNames []string + targetMD string + want bool + }{ + { + name: "should return true if the machine deployment is pending an upgrade", + pendingNames: []string{"md1", "md2"}, + targetMD: "md1", + want: true, + }, + { + name: "should return false if the machine deployment is not pending an upgrade", + pendingNames: []string{"md1", "md2"}, + targetMD: "md3", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + tracker := NewUpgradeTracker() + for _, name := range tt.pendingNames { + tracker.MachineDeployments.MarkPendingUpgrade(name) + } + g.Expect(tracker.MachineDeployments.HoldReconcile(tt.targetMD)).To(Equal(tt.want)) + }) + } + }) +}