Skip to content

Commit

Permalink
Improve resliency of size tagging when hostedcluster KAS down
Browse files Browse the repository at this point in the history
When the kube apiserver of a hosted cluster is not available, the
replica status of nodepools will not be accurate because the CAPI
controllers can no longer get node counts from the API server. This
commit improves the handling of this situation with 2 changes:
- Switches to use .spec.replicas to determine node count of nodepools
  that do not have autoscaling turned on.
- Once a hosted cluster has been tagged with a size, only if the kube
  apiserver of the hosted cluster is available is the hosted cluster
  allowed to move to a different size.
  • Loading branch information
csrwng committed May 14, 2024
1 parent 6bf4672 commit 95b8a59
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/openshift/hypershift/support/releaseinfo"
hyperutil "github.com/openshift/hypershift/support/util"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
Expand Down Expand Up @@ -216,7 +217,9 @@ func (r *reconciler) reconcile(

// first, we figure out the node count for the hosted cluster
var nodeCount uint32
nodeCountRequiresAPIServer := false
if hccoReportsNodeCount {
nodeCountRequiresAPIServer = true
hostedControlPlane, err := r.hostedControlPlaneForHostedCluster(ctx, hostedCluster)
if err != nil {
return nil, nil
Expand All @@ -232,7 +235,25 @@ func (r *reconciler) reconcile(
}

for _, nodePool := range nodePools.Items {
nodeCount += uint32(nodePool.Status.Replicas)
var replicas uint32
// If autoscaling, the replicas should be returned from status
if nodePool.Spec.AutoScaling != nil {
nodeCountRequiresAPIServer = true
replicas = uint32(nodePool.Status.Replicas)
} else if nodePool.Spec.Replicas != nil {
replicas = uint32(*nodePool.Spec.Replicas)
}
nodeCount += replicas
}
}

if sizeClassLabelPresent && nodeCountRequiresAPIServer {
// If already assigned a size, we cannot further transition to other sizes if the kube apiserver is not available
// and it's required to be available for node count calculation
kasAvailableCondition := meta.FindStatusCondition(hostedCluster.Status.Conditions, string(hypershiftv1beta1.KubeAPIServerAvailable))
if kasAvailableCondition == nil || kasAvailableCondition.Status != metav1.ConditionTrue {
logger.Info("HostedCluster kube apiserver is not available, skipping sizing reconciliation")
return nil, nil
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ func TestSizingController_Reconcile(t *testing.T) {
},
nodePoolsForHostedCluster: func(_ context.Context, _ *hypershiftv1beta1.HostedCluster) (*hypershiftv1beta1.NodePoolList, error) {
return &hypershiftv1beta1.NodePoolList{Items: []hypershiftv1beta1.NodePool{
{Status: hypershiftv1beta1.NodePoolStatus{Replicas: 10}},
{Status: hypershiftv1beta1.NodePoolStatus{Replicas: 3}},
{Status: hypershiftv1beta1.NodePoolStatus{Replicas: 17}},
{Spec: hypershiftv1beta1.NodePoolSpec{Replicas: ptr.To[int32](10)}},
{Spec: hypershiftv1beta1.NodePoolSpec{Replicas: ptr.To[int32](3)}},
{Spec: hypershiftv1beta1.NodePoolSpec{AutoScaling: &hypershiftv1beta1.NodePoolAutoScaling{Min: 1, Max: 20}}, Status: hypershiftv1beta1.NodePoolStatus{Replicas: 17}},
}}, nil
},
expected: &action{applyCfg: &hypershiftv1beta1applyconfigurations.HostedClusterApplyConfiguration{
Expand Down

0 comments on commit 95b8a59

Please sign in to comment.