Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Update ASG - do not set desired value for machinepool which have externally managed replicas #4654

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions exp/controllers/awsmachinepool_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import (
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/cluster-api/util/predicates"
)
Expand Down Expand Up @@ -275,7 +276,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP
return nil
}

if scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) {
if annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) {
// Set MachinePool replicas to the ASG DesiredCapacity
if *machinePoolScope.MachinePool.Spec.Replicas != *asg.DesiredCapacity {
machinePoolScope.Info("Setting MachinePool replicas to ASG DesiredCapacity",
Expand Down Expand Up @@ -503,7 +504,7 @@ func (r *AWSMachinePoolReconciler) findASG(machinePoolScope *scope.MachinePoolSc
func diffASG(machinePoolScope *scope.MachinePoolScope, existingASG *expinfrav1.AutoScalingGroup) string {
detectedMachinePoolSpec := machinePoolScope.MachinePool.Spec.DeepCopy()

if !scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) {
if !annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) {
detectedMachinePoolSpec.Replicas = existingASG.DesiredCapacity
}
if diff := cmp.Diff(machinePoolScope.MachinePool.Spec, *detectedMachinePoolSpec); diff != "" {
Expand Down
4 changes: 2 additions & 2 deletions exp/controllers/awsmachinepool_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
ec2Svc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil).AnyTimes()

ms.MachinePool.Annotations = map[string]string{
scope.ReplicasManagedByAnnotation: scope.ExternalAutoscalerReplicasManagedByAnnotationValue,
clusterv1.ReplicasManagedByAnnotation: "somehow-externally-managed",
}
ms.MachinePool.Spec.Replicas = pointer.Int32(0)

Expand Down Expand Up @@ -908,7 +908,7 @@ func TestDiffASG(t *testing.T) {
MachinePool: &expclusterv1.MachinePool{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
scope.ReplicasManagedByAnnotation: scope.ExternalAutoscalerReplicasManagedByAnnotationValue,
clusterv1.ReplicasManagedByAnnotation: "", // empty value counts as true (= externally managed)
},
},
Spec: expclusterv1.MachinePoolSpec{
Expand Down
20 changes: 0 additions & 20 deletions pkg/cloud/scope/machinepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,6 @@ import (
"sigs.k8s.io/cluster-api/util/patch"
)

const (
// ReplicasManagedByAnnotation is an annotation that indicates external (non-Cluster API) management of infra scaling.
// The practical effect of this is that the capi "replica" count is derived from the number of observed infra machines,
// instead of being a source of truth for eventual consistency.
//
// N.B. this is to be replaced by a direct reference to CAPI once https://github.com/kubernetes-sigs/cluster-api/pull/7107 is meged.
ReplicasManagedByAnnotation = "cluster.x-k8s.io/replicas-managed-by"

// ExternalAutoscalerReplicasManagedByAnnotationValue is used with the "cluster.x-k8s.io/replicas-managed-by" annotation
// to indicate an external autoscaler enforces replica count.
//
// N.B. this is to be replaced by a direct reference to CAPI once https://github.com/kubernetes-sigs/cluster-api/pull/7107 is meged.
ExternalAutoscalerReplicasManagedByAnnotationValue = "external-autoscaler"
)

// MachinePoolScope defines a scope defined around a machine and its cluster.
type MachinePoolScope struct {
logger.Logger
Expand Down Expand Up @@ -404,8 +389,3 @@ func (m *MachinePoolScope) LaunchTemplateName() string {
func (m *MachinePoolScope) GetRuntimeObject() runtime.Object {
return m.AWSMachinePool
}

func ReplicasExternallyManaged(mp *expclusterv1.MachinePool) bool {
val, ok := mp.Annotations[ReplicasManagedByAnnotation]
return ok && val == ExternalAutoscalerReplicasManagedByAnnotationValue
}
29 changes: 15 additions & 14 deletions pkg/cloud/services/autoscaling/autoscalinggroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/converters"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record"
"sigs.k8s.io/cluster-api/util/annotations"
)

// SDKToAutoScalingGroup converts an AWS EC2 SDK AutoScalingGroup to the CAPA AutoScalingGroup type.
Expand All @@ -46,7 +47,7 @@ func (s *Service) SDKToAutoScalingGroup(v *autoscaling.Group) (*expinfrav1.AutoS
MaxSize: int32(aws.Int64Value(v.MaxSize)),
MinSize: int32(aws.Int64Value(v.MinSize)),
CapacityRebalance: aws.BoolValue(v.CapacityRebalance),
//TODO: determine what additional values go here and what else should be in the struct
// TODO: determine what additional values go here and what else should be in the struct
}

if v.VPCZoneIdentifier != nil {
Expand Down Expand Up @@ -177,7 +178,7 @@ func (s *Service) CreateASG(machinePoolScope *scope.MachinePoolScope) (*expinfra
// Ignore the problem for externally managed clusters because MachinePool replicas will be updated to the right value automatically.
if mpReplicas >= machinePoolScope.AWSMachinePool.Spec.MinSize && mpReplicas <= machinePoolScope.AWSMachinePool.Spec.MaxSize {
input.DesiredCapacity = &mpReplicas
} else if !scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) {
} else if !annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) {
return nil, fmt.Errorf("incorrect number of replicas %d in MachinePool %v", mpReplicas, machinePoolScope.MachinePool.Name)
}

Expand Down Expand Up @@ -284,35 +285,35 @@ func (s *Service) DeleteASG(name string) error {
}

// UpdateASG will update the ASG of a service.
func (s *Service) UpdateASG(scope *scope.MachinePoolScope) error {
subnetIDs, err := s.SubnetIDs(scope)
func (s *Service) UpdateASG(machinePoolScope *scope.MachinePoolScope) error {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the renaming of the variable from scope to machinePoolScope is necessary to be able to access the package scope and the function scope.ReplicasExternallyManaged

subnetIDs, err := s.SubnetIDs(machinePoolScope)
if err != nil {
return fmt.Errorf("getting subnets for ASG: %w", err)
}

input := &autoscaling.UpdateAutoScalingGroupInput{
AutoScalingGroupName: aws.String(scope.Name()), //TODO: define dynamically - borrow logic from ec2
MaxSize: aws.Int64(int64(scope.AWSMachinePool.Spec.MaxSize)),
MinSize: aws.Int64(int64(scope.AWSMachinePool.Spec.MinSize)),
AutoScalingGroupName: aws.String(machinePoolScope.Name()), // TODO: define dynamically - borrow logic from ec2
MaxSize: aws.Int64(int64(machinePoolScope.AWSMachinePool.Spec.MaxSize)),
MinSize: aws.Int64(int64(machinePoolScope.AWSMachinePool.Spec.MinSize)),
VPCZoneIdentifier: aws.String(strings.Join(subnetIDs, ",")),
CapacityRebalance: aws.Bool(scope.AWSMachinePool.Spec.CapacityRebalance),
CapacityRebalance: aws.Bool(machinePoolScope.AWSMachinePool.Spec.CapacityRebalance),
}

if scope.MachinePool.Spec.Replicas != nil {
input.DesiredCapacity = aws.Int64(int64(*scope.MachinePool.Spec.Replicas))
if machinePoolScope.MachinePool.Spec.Replicas != nil && !annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) {
input.DesiredCapacity = aws.Int64(int64(*machinePoolScope.MachinePool.Spec.Replicas))
}

if scope.AWSMachinePool.Spec.MixedInstancesPolicy != nil {
input.MixedInstancesPolicy = createSDKMixedInstancesPolicy(scope.Name(), scope.AWSMachinePool.Spec.MixedInstancesPolicy)
if machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy != nil {
input.MixedInstancesPolicy = createSDKMixedInstancesPolicy(machinePoolScope.Name(), machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy)
} else {
input.LaunchTemplate = &autoscaling.LaunchTemplateSpecification{
LaunchTemplateId: aws.String(scope.AWSMachinePool.Status.LaunchTemplateID),
LaunchTemplateId: aws.String(machinePoolScope.AWSMachinePool.Status.LaunchTemplateID),
Version: aws.String(expinfrav1.LaunchTemplateLatestVersion),
}
}

if _, err := s.ASGClient.UpdateAutoScalingGroupWithContext(context.TODO(), input); err != nil {
return errors.Wrapf(err, "failed to update ASG %q", scope.Name())
return errors.Wrapf(err, "failed to update ASG %q", machinePoolScope.Name())
}

return nil
Expand Down
48 changes: 40 additions & 8 deletions pkg/cloud/services/autoscaling/autoscalinggroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

Expand Down Expand Up @@ -570,7 +571,7 @@ func TestServiceCreateASG(t *testing.T) {
mps.AWSMachinePool.Spec.MaxSize = 5
mps.MachinePool.Spec.Replicas = aws.Int32(1)
mps.MachinePool.Annotations = map[string]string{
scope.ReplicasManagedByAnnotation: scope.ExternalAutoscalerReplicasManagedByAnnotationValue,
clusterv1.ReplicasManagedByAnnotation: "", // empty value counts as true (= externally managed)
}
},
wantErr: false,
Expand All @@ -592,7 +593,7 @@ func TestServiceCreateASG(t *testing.T) {
mps.AWSMachinePool.Spec.MaxSize = 5
mps.MachinePool.Spec.Replicas = aws.Int32(6)
mps.MachinePool.Annotations = map[string]string{
scope.ReplicasManagedByAnnotation: scope.ExternalAutoscalerReplicasManagedByAnnotationValue,
clusterv1.ReplicasManagedByAnnotation: "truthy",
}
},
wantErr: false,
Expand Down Expand Up @@ -699,17 +700,26 @@ func TestServiceUpdateASG(t *testing.T) {
machinePoolName string
setupMachinePoolScope func(*scope.MachinePoolScope)
wantErr bool
expect func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder)
expect func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder, g *WithT)
}{
{
name: "should return without error if update ASG is successful",
machinePoolName: "update-asg-success",
wantErr: false,
setupMachinePoolScope: func(mps *scope.MachinePoolScope) {
mps.AWSMachinePool.Spec.Subnets = nil
mps.MachinePool.Spec.Replicas = pointer.Int32(3)
mps.AWSMachinePool.Spec.MinSize = 2
mps.AWSMachinePool.Spec.MaxSize = 5
},
expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) {
m.UpdateAutoScalingGroupWithContext(context.TODO(), gomock.AssignableToTypeOf(&autoscaling.UpdateAutoScalingGroupInput{})).Return(&autoscaling.UpdateAutoScalingGroupOutput{}, nil)
expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder, g *WithT) {
m.UpdateAutoScalingGroupWithContext(context.TODO(), gomock.AssignableToTypeOf(&autoscaling.UpdateAutoScalingGroupInput{})).DoAndReturn(func(ctx context.Context, input *autoscaling.UpdateAutoScalingGroupInput, options ...request.Option) (*autoscaling.UpdateAutoScalingGroupOutput, error) {
// CAPA should set min/max, and because there's no "externally managed" annotation, also the
// "desired" number of instances
g.Expect(input.MinSize).To(BeComparableTo(pointer.Int64(2)))
g.Expect(input.MaxSize).To(BeComparableTo(pointer.Int64(5)))
g.Expect(input.DesiredCapacity).To(BeComparableTo(pointer.Int64(3)))
return &autoscaling.UpdateAutoScalingGroupOutput{}, nil
})
},
},
{
Expand All @@ -719,10 +729,31 @@ func TestServiceUpdateASG(t *testing.T) {
setupMachinePoolScope: func(mps *scope.MachinePoolScope) {
mps.AWSMachinePool.Spec.MixedInstancesPolicy = nil
},
expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) {
expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder, g *WithT) {
m.UpdateAutoScalingGroupWithContext(context.TODO(), gomock.AssignableToTypeOf(&autoscaling.UpdateAutoScalingGroupInput{})).Return(nil, awserrors.NewFailedDependency("dependency failure"))
},
},
{
name: "externally managed replicas annotation",
machinePoolName: "update-asg-externally-managed-replicas-annotation",
wantErr: false,
setupMachinePoolScope: func(mps *scope.MachinePoolScope) {
mps.MachinePool.SetAnnotations(map[string]string{clusterv1.ReplicasManagedByAnnotation: "anything-that-is-not-false"})

mps.MachinePool.Spec.Replicas = pointer.Int32(40)
mps.AWSMachinePool.Spec.MinSize = 20
mps.AWSMachinePool.Spec.MaxSize = 50
},
expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder, g *WithT) {
m.UpdateAutoScalingGroupWithContext(context.TODO(), gomock.AssignableToTypeOf(&autoscaling.UpdateAutoScalingGroupInput{})).DoAndReturn(func(ctx context.Context, input *autoscaling.UpdateAutoScalingGroupInput, options ...request.Option) (*autoscaling.UpdateAutoScalingGroupOutput, error) {
// CAPA should set min/max, but not the externally managed "desired" number of instances
g.Expect(input.MinSize).To(BeComparableTo(pointer.Int64(20)))
g.Expect(input.MaxSize).To(BeComparableTo(pointer.Int64(50)))
g.Expect(input.DesiredCapacity).To(BeNil())
return &autoscaling.UpdateAutoScalingGroupOutput{}, nil
})
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand All @@ -733,13 +764,14 @@ func TestServiceUpdateASG(t *testing.T) {
g.Expect(err).ToNot(HaveOccurred())
ec2Mock := mocks.NewMockEC2API(mockCtrl)
asgMock := mock_autoscalingiface.NewMockAutoScalingAPI(mockCtrl)
tt.expect(ec2Mock.EXPECT(), asgMock.EXPECT())
tt.expect(ec2Mock.EXPECT(), asgMock.EXPECT(), g)
s := NewService(clusterScope)
s.ASGClient = asgMock

mps, err := getMachinePoolScope(fakeClient, clusterScope)
g.Expect(err).ToNot(HaveOccurred())
mps.AWSMachinePool.Name = tt.machinePoolName
tt.setupMachinePoolScope(mps)

err = s.UpdateASG(mps)
checkErr(tt.wantErr, err, g)
Expand Down
4 changes: 2 additions & 2 deletions pkg/cloud/services/eks/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ import (
expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/awserrors"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/converters"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/wait"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/annotations"
)

func (s *NodegroupService) describeNodegroup() (*eks.Nodegroup, error) {
Expand Down Expand Up @@ -533,7 +533,7 @@ func (s *NodegroupService) reconcileNodegroup(ctx context.Context) error {
break
}

if scope.ReplicasExternallyManaged(s.scope.MachinePool) {
if annotations.ReplicasManagedByExternalAutoscaler(s.scope.MachinePool) {
// Set MachinePool replicas to the node group DesiredCapacity
ngDesiredCapacity := int32(aws.Int64Value(ng.ScalingConfig.DesiredSize))
if *s.scope.MachinePool.Spec.Replicas != ngDesiredCapacity {
Expand Down