From 4d33ed1ebe695f9eb4a2578d429c2afe7d48c968 Mon Sep 17 00:00:00 2001 From: calvix Date: Thu, 23 Nov 2023 12:15:45 +0100 Subject: [PATCH] Update ASG - do not set desired value for machinepool which have externally managed replicas --- exp/controllers/awsmachinepool_controller.go | 5 +- .../awsmachinepool_controller_test.go | 4 +- pkg/cloud/scope/machinepool.go | 20 -------- .../services/autoscaling/autoscalinggroup.go | 29 +++++------ .../autoscaling/autoscalinggroup_test.go | 48 +++++++++++++++---- pkg/cloud/services/eks/nodegroup.go | 4 +- 6 files changed, 62 insertions(+), 48 deletions(-) diff --git a/exp/controllers/awsmachinepool_controller.go b/exp/controllers/awsmachinepool_controller.go index a3b2926480..0edf2cbc20 100644 --- a/exp/controllers/awsmachinepool_controller.go +++ b/exp/controllers/awsmachinepool_controller.go @@ -49,6 +49,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/predicates" ) @@ -275,7 +276,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP return nil } - if scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) { + if annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) { // Set MachinePool replicas to the ASG DesiredCapacity if *machinePoolScope.MachinePool.Spec.Replicas != *asg.DesiredCapacity { machinePoolScope.Info("Setting MachinePool replicas to ASG DesiredCapacity", @@ -503,7 +504,7 @@ func (r *AWSMachinePoolReconciler) findASG(machinePoolScope *scope.MachinePoolSc func diffASG(machinePoolScope *scope.MachinePoolScope, existingASG *expinfrav1.AutoScalingGroup) string { detectedMachinePoolSpec := machinePoolScope.MachinePool.Spec.DeepCopy() - if !scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) { + if !annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) { detectedMachinePoolSpec.Replicas = existingASG.DesiredCapacity } if diff := cmp.Diff(machinePoolScope.MachinePool.Spec, *detectedMachinePoolSpec); diff != "" { diff --git a/exp/controllers/awsmachinepool_controller_test.go b/exp/controllers/awsmachinepool_controller_test.go index 4a459bf707..829df43404 100644 --- a/exp/controllers/awsmachinepool_controller_test.go +++ b/exp/controllers/awsmachinepool_controller_test.go @@ -380,7 +380,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { ec2Svc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil).AnyTimes() ms.MachinePool.Annotations = map[string]string{ - scope.ReplicasManagedByAnnotation: scope.ExternalAutoscalerReplicasManagedByAnnotationValue, + clusterv1.ReplicasManagedByAnnotation: "somehow-externally-managed", } ms.MachinePool.Spec.Replicas = pointer.Int32(0) @@ -908,7 +908,7 @@ func TestDiffASG(t *testing.T) { MachinePool: &expclusterv1.MachinePool{ ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ - scope.ReplicasManagedByAnnotation: scope.ExternalAutoscalerReplicasManagedByAnnotationValue, + clusterv1.ReplicasManagedByAnnotation: "", // empty value counts as true (= externally managed) }, }, Spec: expclusterv1.MachinePoolSpec{ diff --git a/pkg/cloud/scope/machinepool.go b/pkg/cloud/scope/machinepool.go index b8636e00da..1927f06d14 100644 --- a/pkg/cloud/scope/machinepool.go +++ b/pkg/cloud/scope/machinepool.go @@ -43,21 +43,6 @@ import ( "sigs.k8s.io/cluster-api/util/patch" ) -const ( - // ReplicasManagedByAnnotation is an annotation that indicates external (non-Cluster API) management of infra scaling. - // The practical effect of this is that the capi "replica" count is derived from the number of observed infra machines, - // instead of being a source of truth for eventual consistency. - // - // N.B. this is to be replaced by a direct reference to CAPI once https://github.com/kubernetes-sigs/cluster-api/pull/7107 is meged. - ReplicasManagedByAnnotation = "cluster.x-k8s.io/replicas-managed-by" - - // ExternalAutoscalerReplicasManagedByAnnotationValue is used with the "cluster.x-k8s.io/replicas-managed-by" annotation - // to indicate an external autoscaler enforces replica count. - // - // N.B. this is to be replaced by a direct reference to CAPI once https://github.com/kubernetes-sigs/cluster-api/pull/7107 is meged. - ExternalAutoscalerReplicasManagedByAnnotationValue = "external-autoscaler" -) - // MachinePoolScope defines a scope defined around a machine and its cluster. type MachinePoolScope struct { logger.Logger @@ -404,8 +389,3 @@ func (m *MachinePoolScope) LaunchTemplateName() string { func (m *MachinePoolScope) GetRuntimeObject() runtime.Object { return m.AWSMachinePool } - -func ReplicasExternallyManaged(mp *expclusterv1.MachinePool) bool { - val, ok := mp.Annotations[ReplicasManagedByAnnotation] - return ok && val == ExternalAutoscalerReplicasManagedByAnnotationValue -} diff --git a/pkg/cloud/services/autoscaling/autoscalinggroup.go b/pkg/cloud/services/autoscaling/autoscalinggroup.go index fb471c7d0f..44dfe6bbfd 100644 --- a/pkg/cloud/services/autoscaling/autoscalinggroup.go +++ b/pkg/cloud/services/autoscaling/autoscalinggroup.go @@ -34,6 +34,7 @@ import ( "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/converters" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record" + "sigs.k8s.io/cluster-api/util/annotations" ) // SDKToAutoScalingGroup converts an AWS EC2 SDK AutoScalingGroup to the CAPA AutoScalingGroup type. @@ -46,7 +47,7 @@ func (s *Service) SDKToAutoScalingGroup(v *autoscaling.Group) (*expinfrav1.AutoS MaxSize: int32(aws.Int64Value(v.MaxSize)), MinSize: int32(aws.Int64Value(v.MinSize)), CapacityRebalance: aws.BoolValue(v.CapacityRebalance), - //TODO: determine what additional values go here and what else should be in the struct + // TODO: determine what additional values go here and what else should be in the struct } if v.VPCZoneIdentifier != nil { @@ -177,7 +178,7 @@ func (s *Service) CreateASG(machinePoolScope *scope.MachinePoolScope) (*expinfra // Ignore the problem for externally managed clusters because MachinePool replicas will be updated to the right value automatically. if mpReplicas >= machinePoolScope.AWSMachinePool.Spec.MinSize && mpReplicas <= machinePoolScope.AWSMachinePool.Spec.MaxSize { input.DesiredCapacity = &mpReplicas - } else if !scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) { + } else if !annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) { return nil, fmt.Errorf("incorrect number of replicas %d in MachinePool %v", mpReplicas, machinePoolScope.MachinePool.Name) } @@ -284,35 +285,35 @@ func (s *Service) DeleteASG(name string) error { } // UpdateASG will update the ASG of a service. -func (s *Service) UpdateASG(scope *scope.MachinePoolScope) error { - subnetIDs, err := s.SubnetIDs(scope) +func (s *Service) UpdateASG(machinePoolScope *scope.MachinePoolScope) error { + subnetIDs, err := s.SubnetIDs(machinePoolScope) if err != nil { return fmt.Errorf("getting subnets for ASG: %w", err) } input := &autoscaling.UpdateAutoScalingGroupInput{ - AutoScalingGroupName: aws.String(scope.Name()), //TODO: define dynamically - borrow logic from ec2 - MaxSize: aws.Int64(int64(scope.AWSMachinePool.Spec.MaxSize)), - MinSize: aws.Int64(int64(scope.AWSMachinePool.Spec.MinSize)), + AutoScalingGroupName: aws.String(machinePoolScope.Name()), // TODO: define dynamically - borrow logic from ec2 + MaxSize: aws.Int64(int64(machinePoolScope.AWSMachinePool.Spec.MaxSize)), + MinSize: aws.Int64(int64(machinePoolScope.AWSMachinePool.Spec.MinSize)), VPCZoneIdentifier: aws.String(strings.Join(subnetIDs, ",")), - CapacityRebalance: aws.Bool(scope.AWSMachinePool.Spec.CapacityRebalance), + CapacityRebalance: aws.Bool(machinePoolScope.AWSMachinePool.Spec.CapacityRebalance), } - if scope.MachinePool.Spec.Replicas != nil { - input.DesiredCapacity = aws.Int64(int64(*scope.MachinePool.Spec.Replicas)) + if machinePoolScope.MachinePool.Spec.Replicas != nil && !annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) { + input.DesiredCapacity = aws.Int64(int64(*machinePoolScope.MachinePool.Spec.Replicas)) } - if scope.AWSMachinePool.Spec.MixedInstancesPolicy != nil { - input.MixedInstancesPolicy = createSDKMixedInstancesPolicy(scope.Name(), scope.AWSMachinePool.Spec.MixedInstancesPolicy) + if machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy != nil { + input.MixedInstancesPolicy = createSDKMixedInstancesPolicy(machinePoolScope.Name(), machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy) } else { input.LaunchTemplate = &autoscaling.LaunchTemplateSpecification{ - LaunchTemplateId: aws.String(scope.AWSMachinePool.Status.LaunchTemplateID), + LaunchTemplateId: aws.String(machinePoolScope.AWSMachinePool.Status.LaunchTemplateID), Version: aws.String(expinfrav1.LaunchTemplateLatestVersion), } } if _, err := s.ASGClient.UpdateAutoScalingGroupWithContext(context.TODO(), input); err != nil { - return errors.Wrapf(err, "failed to update ASG %q", scope.Name()) + return errors.Wrapf(err, "failed to update ASG %q", machinePoolScope.Name()) } return nil diff --git a/pkg/cloud/services/autoscaling/autoscalinggroup_test.go b/pkg/cloud/services/autoscaling/autoscalinggroup_test.go index 452fe6f777..f6dac23a80 100644 --- a/pkg/cloud/services/autoscaling/autoscalinggroup_test.go +++ b/pkg/cloud/services/autoscaling/autoscalinggroup_test.go @@ -30,6 +30,7 @@ import ( . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/utils/pointer" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -570,7 +571,7 @@ func TestServiceCreateASG(t *testing.T) { mps.AWSMachinePool.Spec.MaxSize = 5 mps.MachinePool.Spec.Replicas = aws.Int32(1) mps.MachinePool.Annotations = map[string]string{ - scope.ReplicasManagedByAnnotation: scope.ExternalAutoscalerReplicasManagedByAnnotationValue, + clusterv1.ReplicasManagedByAnnotation: "", // empty value counts as true (= externally managed) } }, wantErr: false, @@ -592,7 +593,7 @@ func TestServiceCreateASG(t *testing.T) { mps.AWSMachinePool.Spec.MaxSize = 5 mps.MachinePool.Spec.Replicas = aws.Int32(6) mps.MachinePool.Annotations = map[string]string{ - scope.ReplicasManagedByAnnotation: scope.ExternalAutoscalerReplicasManagedByAnnotationValue, + clusterv1.ReplicasManagedByAnnotation: "truthy", } }, wantErr: false, @@ -699,17 +700,26 @@ func TestServiceUpdateASG(t *testing.T) { machinePoolName string setupMachinePoolScope func(*scope.MachinePoolScope) wantErr bool - expect func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) + expect func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder, g *WithT) }{ { name: "should return without error if update ASG is successful", machinePoolName: "update-asg-success", wantErr: false, setupMachinePoolScope: func(mps *scope.MachinePoolScope) { - mps.AWSMachinePool.Spec.Subnets = nil + mps.MachinePool.Spec.Replicas = pointer.Int32(3) + mps.AWSMachinePool.Spec.MinSize = 2 + mps.AWSMachinePool.Spec.MaxSize = 5 }, - expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) { - m.UpdateAutoScalingGroupWithContext(context.TODO(), gomock.AssignableToTypeOf(&autoscaling.UpdateAutoScalingGroupInput{})).Return(&autoscaling.UpdateAutoScalingGroupOutput{}, nil) + expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder, g *WithT) { + m.UpdateAutoScalingGroupWithContext(context.TODO(), gomock.AssignableToTypeOf(&autoscaling.UpdateAutoScalingGroupInput{})).DoAndReturn(func(ctx context.Context, input *autoscaling.UpdateAutoScalingGroupInput, options ...request.Option) (*autoscaling.UpdateAutoScalingGroupOutput, error) { + // CAPA should set min/max, and because there's no "externally managed" annotation, also the + // "desired" number of instances + g.Expect(input.MinSize).To(BeComparableTo(pointer.Int64(2))) + g.Expect(input.MaxSize).To(BeComparableTo(pointer.Int64(5))) + g.Expect(input.DesiredCapacity).To(BeComparableTo(pointer.Int64(3))) + return &autoscaling.UpdateAutoScalingGroupOutput{}, nil + }) }, }, { @@ -719,10 +729,31 @@ func TestServiceUpdateASG(t *testing.T) { setupMachinePoolScope: func(mps *scope.MachinePoolScope) { mps.AWSMachinePool.Spec.MixedInstancesPolicy = nil }, - expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) { + expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder, g *WithT) { m.UpdateAutoScalingGroupWithContext(context.TODO(), gomock.AssignableToTypeOf(&autoscaling.UpdateAutoScalingGroupInput{})).Return(nil, awserrors.NewFailedDependency("dependency failure")) }, }, + { + name: "externally managed replicas annotation", + machinePoolName: "update-asg-externally-managed-replicas-annotation", + wantErr: false, + setupMachinePoolScope: func(mps *scope.MachinePoolScope) { + mps.MachinePool.SetAnnotations(map[string]string{clusterv1.ReplicasManagedByAnnotation: "anything-that-is-not-false"}) + + mps.MachinePool.Spec.Replicas = pointer.Int32(40) + mps.AWSMachinePool.Spec.MinSize = 20 + mps.AWSMachinePool.Spec.MaxSize = 50 + }, + expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder, g *WithT) { + m.UpdateAutoScalingGroupWithContext(context.TODO(), gomock.AssignableToTypeOf(&autoscaling.UpdateAutoScalingGroupInput{})).DoAndReturn(func(ctx context.Context, input *autoscaling.UpdateAutoScalingGroupInput, options ...request.Option) (*autoscaling.UpdateAutoScalingGroupOutput, error) { + // CAPA should set min/max, but not the externally managed "desired" number of instances + g.Expect(input.MinSize).To(BeComparableTo(pointer.Int64(20))) + g.Expect(input.MaxSize).To(BeComparableTo(pointer.Int64(50))) + g.Expect(input.DesiredCapacity).To(BeNil()) + return &autoscaling.UpdateAutoScalingGroupOutput{}, nil + }) + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -733,13 +764,14 @@ func TestServiceUpdateASG(t *testing.T) { g.Expect(err).ToNot(HaveOccurred()) ec2Mock := mocks.NewMockEC2API(mockCtrl) asgMock := mock_autoscalingiface.NewMockAutoScalingAPI(mockCtrl) - tt.expect(ec2Mock.EXPECT(), asgMock.EXPECT()) + tt.expect(ec2Mock.EXPECT(), asgMock.EXPECT(), g) s := NewService(clusterScope) s.ASGClient = asgMock mps, err := getMachinePoolScope(fakeClient, clusterScope) g.Expect(err).ToNot(HaveOccurred()) mps.AWSMachinePool.Name = tt.machinePoolName + tt.setupMachinePoolScope(mps) err = s.UpdateASG(mps) checkErr(tt.wantErr, err, g) diff --git a/pkg/cloud/services/eks/nodegroup.go b/pkg/cloud/services/eks/nodegroup.go index cdf0a6ca9f..ec24f16fc3 100644 --- a/pkg/cloud/services/eks/nodegroup.go +++ b/pkg/cloud/services/eks/nodegroup.go @@ -34,10 +34,10 @@ import ( expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/awserrors" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/converters" - "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/wait" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/annotations" ) func (s *NodegroupService) describeNodegroup() (*eks.Nodegroup, error) { @@ -533,7 +533,7 @@ func (s *NodegroupService) reconcileNodegroup(ctx context.Context) error { break } - if scope.ReplicasExternallyManaged(s.scope.MachinePool) { + if annotations.ReplicasManagedByExternalAutoscaler(s.scope.MachinePool) { // Set MachinePool replicas to the node group DesiredCapacity ngDesiredCapacity := int32(aws.Int64Value(ng.ScalingConfig.DesiredSize)) if *s.scope.MachinePool.Spec.Replicas != ngDesiredCapacity {