Skip to content

Commit

Permalink
topology controller should avoid unnecessary rollouts during upgrades
Browse files Browse the repository at this point in the history
  • Loading branch information
Yuvaraj Kakaraparthi committed Jun 2, 2023
1 parent bb287a1 commit 13d456d
Show file tree
Hide file tree
Showing 10 changed files with 659 additions and 375 deletions.
5 changes: 5 additions & 0 deletions api/v1beta1/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,11 @@ const (
// not yet completed because Control Plane is not yet updated to match the desired topology spec.
TopologyReconciledControlPlaneUpgradePendingReason = "ControlPlaneUpgradePending"

// TopologyReconciledMachineDeploymentsCreatePendingReason (Severity=Info) documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachineDeployments is yet to be created.
// This generally happens because new MachineDeployment creations are held off while the ControlPlane is not stable.
TopologyReconciledMachineDeploymentsCreatePendingReason = "MachineDeploymentsCreatePending"

// TopologyReconciledMachineDeploymentsUpgradePendingReason (Severity=Info) documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachineDeployments is not yet updated to match the desired topology spec.
TopologyReconciledMachineDeploymentsUpgradePendingReason = "MachineDeploymentsUpgradePending"
Expand Down
46 changes: 30 additions & 16 deletions internal/controllers/topology/cluster/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
)
return nil
}

// If an error occurred during reconciliation set the TopologyReconciled condition to false.
// Add the error message from the reconcile function to the message of the condition.
if reconcileErr != nil {
Expand Down Expand Up @@ -108,25 +109,37 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
// * either the Control Plane or any of the MachineDeployments are still pending to pick up the new version
// (generally happens when upgrading the cluster)
// * when there are MachineDeployments for which the upgrade has been deferred
if s.UpgradeTracker.ControlPlane.PendingUpgrade ||
s.UpgradeTracker.MachineDeployments.PendingUpgrade() ||
// * when new MachineDeployments are pending to be created
// (generally happens when upgrading the cluster)
if s.UpgradeTracker.ControlPlane.IsPendingUpgrade ||
s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate() ||
s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade() ||
s.UpgradeTracker.MachineDeployments.DeferredUpgrade() {
msgBuilder := &strings.Builder{}
var reason string

// TODO(ykakarap): Evaluate potential improvements to building the condition. Multiple causes can trigger the
// condition to be false at the same time (Example: ControlPlane.IsPendingUpgrade and MachineDeployments.IsAnyPendingCreate can
// occur at the same time). Find better wording and `Reason` for the condition so that the condition can be rich
// with all the relevant information.
switch {
case s.UpgradeTracker.ControlPlane.PendingUpgrade:
fmt.Fprintf(msgBuilder, "Control plane upgrade to %s on hold.", s.Blueprint.Topology.Version)
case s.UpgradeTracker.ControlPlane.IsPendingUpgrade:
fmt.Fprintf(msgBuilder, "Control plane rollout and upgrade to version %s on hold.", s.Blueprint.Topology.Version)
reason = clusterv1.TopologyReconciledControlPlaneUpgradePendingReason
case s.UpgradeTracker.MachineDeployments.PendingUpgrade():
fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s upgrade to version %s on hold.",
computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.PendingUpgradeNames()),
case s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade():
fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s rollout and upgrade to version %s on hold.",
computeNameList(s.UpgradeTracker.MachineDeployments.PendingUpgradeNames()),
s.Blueprint.Topology.Version,
)
reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason
case s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate():
fmt.Fprintf(msgBuilder, "MachineDeployment(s) for Topologies %s creation on hold.",
computeNameList(s.UpgradeTracker.MachineDeployments.PendingCreateTopologyNames()),
)
reason = clusterv1.TopologyReconciledMachineDeploymentsCreatePendingReason
case s.UpgradeTracker.MachineDeployments.DeferredUpgrade():
fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s upgrade to version %s deferred.",
computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.DeferredUpgradeNames()),
fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s rollout and upgrade to version %s deferred.",
computeNameList(s.UpgradeTracker.MachineDeployments.DeferredUpgradeNames()),
s.Blueprint.Topology.Version,
)
reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason
Expand All @@ -148,7 +161,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste

case len(s.UpgradeTracker.MachineDeployments.UpgradingNames()) > 0:
fmt.Fprintf(msgBuilder, " MachineDeployment(s) %s are upgrading",
computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.UpgradingNames()),
computeNameList(s.UpgradeTracker.MachineDeployments.UpgradingNames()),
)
}

Expand All @@ -175,12 +188,13 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
return nil
}

// computeMachineDeploymentNameList computes the MD name list to be shown in conditions.
// It shortens the list to at most 5 MachineDeployment names.
func computeMachineDeploymentNameList(mdList []string) any {
if len(mdList) > 5 {
mdList = append(mdList[:5], "...")
// computeNameList computes list of names form the given list to be shown in conditions.
// It shortens the list to at most 5 names and adds an ellipsis at the end if the list
// has more than 5 elements.
func computeNameList(list []string) any {
if len(list) > 5 {
list = append(list[:5], "...")
}

return strings.Join(mdList, ", ")
return strings.Join(list, ", ")
}
72 changes: 52 additions & 20 deletions internal/controllers/topology/cluster/conditions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,15 +123,15 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = true
ut.ControlPlane.IsPendingUpgrade = true
ut.ControlPlane.IsProvisioning = true
return ut
}(),
HookResponseTracker: scope.NewHookResponseTracker(),
},
wantConditionStatus: corev1.ConditionFalse,
wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason,
wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. Control plane is completing initial provisioning",
wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is completing initial provisioning",
},
{
name: "should set the condition to false if new version is not picked up because control plane is upgrading",
Expand All @@ -154,15 +154,15 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = true
ut.ControlPlane.IsPendingUpgrade = true
ut.ControlPlane.IsUpgrading = true
return ut
}(),
HookResponseTracker: scope.NewHookResponseTracker(),
},
wantConditionStatus: corev1.ConditionFalse,
wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason,
wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. Control plane is upgrading to version v1.21.2",
wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.21.2",
},
{
name: "should set the condition to false if new version is not picked up because control plane is scaling",
Expand All @@ -185,15 +185,15 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = true
ut.ControlPlane.IsPendingUpgrade = true
ut.ControlPlane.IsScaling = true
return ut
}(),
HookResponseTracker: scope.NewHookResponseTracker(),
},
wantConditionStatus: corev1.ConditionFalse,
wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason,
wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. Control plane is reconciling desired replicas",
wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas",
},
{
name: "should set the condition to false if new version is not picked up because at least one of the machine deployment is upgrading",
Expand Down Expand Up @@ -230,15 +230,15 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = true
ut.ControlPlane.IsPendingUpgrade = true
ut.MachineDeployments.MarkUpgrading("md0-abc123")
return ut
}(),
HookResponseTracker: scope.NewHookResponseTracker(),
},
wantConditionStatus: corev1.ConditionFalse,
wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason,
wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading",
wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading",
},
{
name: "should set the condition to false if control plane picked the new version but machine deployments did not because control plane is upgrading",
Expand Down Expand Up @@ -275,7 +275,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = false
ut.ControlPlane.IsPendingUpgrade = false
ut.ControlPlane.IsUpgrading = true
ut.MachineDeployments.MarkPendingUpgrade("md0-abc123")
return ut
Expand All @@ -284,7 +284,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
wantConditionStatus: corev1.ConditionFalse,
wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason,
wantConditionMessage: "MachineDeployment(s) md0-abc123 upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.22.0",
wantConditionMessage: "MachineDeployment(s) md0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.22.0",
},
{
name: "should set the condition to false if control plane picked the new version but machine deployments did not because control plane is scaling",
Expand Down Expand Up @@ -321,7 +321,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = false
ut.ControlPlane.IsPendingUpgrade = false
ut.ControlPlane.IsScaling = true
ut.MachineDeployments.MarkPendingUpgrade("md0-abc123")
return ut
Expand All @@ -330,7 +330,39 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
wantConditionStatus: corev1.ConditionFalse,
wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason,
wantConditionMessage: "MachineDeployment(s) md0-abc123 upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas",
wantConditionMessage: "MachineDeployment(s) md0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas",
},
{
name: "should set the condition to false if control plane picked the new version but there are machine deployments pending create because control plane is scaling",
reconcileErr: nil,
cluster: &clusterv1.Cluster{},
s: &scope.Scope{
Blueprint: &scope.ClusterBlueprint{
Topology: &clusterv1.Topology{
Version: "v1.22.0",
},
},
Current: &scope.ClusterState{
Cluster: &clusterv1.Cluster{},
ControlPlane: &scope.ControlPlaneState{
Object: builder.ControlPlane("ns1", "controlplane1").
WithVersion("v1.22.0").
WithReplicas(3).
Build(),
},
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.IsPendingUpgrade = false
ut.ControlPlane.IsScaling = true
ut.MachineDeployments.MarkPendingCreate("md0")
return ut
}(),
HookResponseTracker: scope.NewHookResponseTracker(),
},
wantConditionStatus: corev1.ConditionFalse,
wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsCreatePendingReason,
wantConditionMessage: "MachineDeployment(s) for Topologies md0 creation on hold. Control plane is reconciling desired replicas",
},
{
name: "should set the condition to true if control plane picked the new version and is upgrading but there are no machine deployments",
Expand All @@ -353,7 +385,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = false
ut.ControlPlane.IsPendingUpgrade = false
ut.ControlPlane.IsUpgrading = true
return ut
}(),
Expand Down Expand Up @@ -382,7 +414,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = false
ut.ControlPlane.IsPendingUpgrade = false
ut.ControlPlane.IsScaling = true
return ut
}(),
Expand Down Expand Up @@ -450,7 +482,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = false
ut.ControlPlane.IsPendingUpgrade = false
ut.MachineDeployments.MarkUpgrading("md0-abc123")
ut.MachineDeployments.MarkPendingUpgrade("md1-abc123")
return ut
Expand All @@ -469,7 +501,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
wantConditionStatus: corev1.ConditionFalse,
wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason,
wantConditionMessage: "MachineDeployment(s) md1-abc123 upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading",
wantConditionMessage: "MachineDeployment(s) md1-abc123 rollout and upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading",
},
{
name: "should set the condition to false if some machine deployments have not picked the new version because their upgrade has been deferred",
Expand Down Expand Up @@ -520,15 +552,15 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = false
ut.ControlPlane.IsPendingUpgrade = false
ut.MachineDeployments.MarkDeferredUpgrade("md1-abc123")
return ut
}(),
HookResponseTracker: scope.NewHookResponseTracker(),
},
wantConditionStatus: corev1.ConditionFalse,
wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason,
wantConditionMessage: "MachineDeployment(s) md1-abc123 upgrade to version v1.22.0 deferred.",
wantConditionMessage: "MachineDeployment(s) md1-abc123 rollout and upgrade to version v1.22.0 deferred.",
},
{
name: "should set the condition to true if there are no reconcile errors and control plane and all machine deployments picked up the new version",
Expand Down Expand Up @@ -579,7 +611,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
},
UpgradeTracker: func() *scope.UpgradeTracker {
ut := scope.NewUpgradeTracker()
ut.ControlPlane.PendingUpgrade = false
ut.ControlPlane.IsPendingUpgrade = false
return ut
}(),
HookResponseTracker: scope.NewHookResponseTracker(),
Expand Down Expand Up @@ -656,7 +688,7 @@ func TestComputeMachineDeploymentNameList(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)
g.Expect(computeMachineDeploymentNameList(tt.mdList)).To(Equal(tt.expected))
g.Expect(computeNameList(tt.mdList)).To(Equal(tt.expected))
})
}
}
Loading

0 comments on commit 13d456d

Please sign in to comment.