From e9dd2ce99962292fbade517202d8f17dec6e3d2f Mon Sep 17 00:00:00 2001 From: aymericDD Date: Wed, 20 Sep 2023 09:40:48 +0200 Subject: [PATCH] CHAOS-232: Moving chaos pod logic from the chaos controller to a dedicated service (#764) * Reducing the responsibility of the disruption controller by moving the logic of chaos pods into a dedicated service called ChaosPodService. * Refactoring cloud service manager to be able to mock it. * refactor: use context from the controller * fix: chaos pod service unit test + wrong behavior * refacto: wrong usage of logs labels Jira: CHAOS-232 --- .vendor.mockery.yaml | 8 + api/v1beta1/disk_failure_test.go | 2 +- api/v1beta1/disruption_types.go | 140 ++ api/v1beta1/disruption_types_test.go | 315 +++- api/v1beta1/disruption_webhook.go | 2 +- api/v1beta1/disruption_webhook_test.go | 10 +- api/v1beta1/network_disruption.go | 35 + api/v1beta1/validations.go | 8 + builderstest/chaospod.go | 338 ++++ builderstest/disruption.go | 236 +++ builderstest/pod.go | 100 ++ .../cloud_services_providers_manager_mock.go | 288 ++++ cloudservice/manager.go | 82 +- cloudservice/manager_test.go | 290 +++- controllers/disruption_controller.go | 723 ++------ controllers/helpers.go | 291 ---- controllers/helpers_test.go | 369 ----- controllers/suite_toolsfor_test.go | 9 +- main.go | 64 +- mocks/client.go | 630 +++++++ mocks/round_tripper.go | 95 ++ services/chaospod.go | 642 +++++++ services/chaospod_test.go | 1476 +++++++++++++++++ services/suite_test.go | 29 + targetselector/target_selector.go | 21 +- targetselector/target_selector_test.go | 42 + utils/utils.go | 2 +- watchers/disruptions_watchers_manager_test.go | 10 +- watchers/factory_test.go | 2 +- watchers/manager_test.go | 2 +- watchers/watcher_test.go | 8 +- 31 files changed, 4883 insertions(+), 1386 deletions(-) create mode 100644 builderstest/chaospod.go create mode 100644 builderstest/disruption.go create mode 100644 builderstest/pod.go create mode 100644 cloudservice/cloud_services_providers_manager_mock.go delete mode 100644 controllers/helpers.go delete mode 100644 controllers/helpers_test.go create mode 100644 mocks/client.go create mode 100644 mocks/round_tripper.go create mode 100644 services/chaospod.go create mode 100644 services/chaospod_test.go create mode 100644 services/suite_test.go create mode 100644 targetselector/target_selector_test.go diff --git a/.vendor.mockery.yaml b/.vendor.mockery.yaml index 434e47f28..ce3cb73c5 100644 --- a/.vendor.mockery.yaml +++ b/.vendor.mockery.yaml @@ -12,6 +12,11 @@ inpackage: False # If you wish to mock an interface from the vendor, you need to define both the package and the specific interface you want to mock. packages: + net/http: + interfaces: + RoundTripper: + config: + mockname: RoundTripperMock sigs.k8s.io/controller-runtime/pkg/controller: interfaces: Controller: @@ -22,6 +27,9 @@ packages: Reader: config: mockname: ReaderMock + Client: + config: + mockname: K8SClientMock k8s.io/client-go/tools/record: interfaces: EventRecorder: diff --git a/api/v1beta1/disk_failure_test.go b/api/v1beta1/disk_failure_test.go index 074697226..0c9c581b1 100644 --- a/api/v1beta1/disk_failure_test.go +++ b/api/v1beta1/disk_failure_test.go @@ -41,7 +41,7 @@ var _ = Describe("DiskFailureSpec", func() { // Assert Expect(err).To(HaveOccurred()) - Expect(err.Error()).Should(Equal(expectedError)) + Expect(err).To(MatchError(expectedError)) }, Entry("with a path exceeding 62 characters", DiskFailureSpec{ diff --git a/api/v1beta1/disruption_types.go b/api/v1beta1/disruption_types.go index 0493adafc..28f0973c1 100644 --- a/api/v1beta1/disruption_types.go +++ b/api/v1beta1/disruption_types.go @@ -24,6 +24,7 @@ import ( "github.com/DataDog/chaos-controller/utils" "github.com/hashicorp/go-multierror" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/selection" @@ -93,6 +94,14 @@ type DisruptionTriggers struct { CreatePods DisruptionTrigger `json:"createPods,omitempty"` } +type TerminationStatus uint8 + +const ( + TSNotTerminated TerminationStatus = iota + TSTemporarilyTerminated + TSDefinitivelyTerminated +) + func (dt DisruptionTriggers) IsZero() bool { return dt.Inject.IsZero() && dt.CreatePods.IsZero() } @@ -248,6 +257,137 @@ type Disruption struct { Status DisruptionStatus `json:"status,omitempty"` } +// TimeToInject calculates the time at which the disruption should be injected based on its own creationTimestamp. +// It considers the specified triggers for injection timing in the disruption's specification. +func (r *Disruption) TimeToInject() time.Time { + triggers := r.Spec.Triggers + + if triggers.IsZero() { + return r.CreationTimestamp.Time + } + + if triggers.Inject.IsZero() { + return r.TimeToCreatePods() + } + + var notInjectedBefore time.Time + + // validation should have already prevented a situation where both Offset and NotBefore are set + if !triggers.Inject.NotBefore.IsZero() { + notInjectedBefore = triggers.Inject.NotBefore.Time + } + + if triggers.Inject.Offset.Duration() > 0 { + // We measure the offset from the latter of two timestamps: creationTimestamp of the disruption, and spec.trigger.createPods + notInjectedBefore = r.TimeToCreatePods().Add(triggers.Inject.Offset.Duration()) + } + + if r.CreationTimestamp.Time.After(notInjectedBefore) { + return r.CreationTimestamp.Time + } + + return notInjectedBefore +} + +// TimeToCreatePods takes the DisruptionTriggers field from a Disruption spec, along with the time.Time at which that disruption was created +// It returns the earliest time.Time at which the chaos-controller should begin creating chaos pods, given the specified DisruptionTriggers +func (r *Disruption) TimeToCreatePods() time.Time { + triggers := r.Spec.Triggers + + if triggers.IsZero() { + return r.CreationTimestamp.Time + } + + if triggers.CreatePods.IsZero() { + return r.CreationTimestamp.Time + } + + var noPodsBefore time.Time + + // validation should have already prevented a situation where both Offset and NotBefore are set + if !triggers.CreatePods.NotBefore.IsZero() { + noPodsBefore = triggers.CreatePods.NotBefore.Time + } + + if triggers.CreatePods.Offset.Duration() > 0 { + noPodsBefore = r.CreationTimestamp.Add(triggers.CreatePods.Offset.Duration()) + } + + if r.CreationTimestamp.After(noPodsBefore) { + return r.CreationTimestamp.Time + } + + return noPodsBefore +} + +// RemainingDuration return the remaining duration of the disruption. +func (r *Disruption) RemainingDuration() time.Duration { + return r.calculateDeadline( + r.Spec.Duration.Duration(), + r.TimeToInject(), + ) +} + +func (r *Disruption) calculateDeadline(duration time.Duration, creationTime time.Time) time.Duration { + // first we must calculate the timeout from when the disruption was created, not from now + timeout := creationTime.Add(duration) + now := time.Now() // rather not take the risk that the time changes by a second during this function + + // return the number of seconds between now and the deadline + return timeout.Sub(now) +} + +// TerminationStatus determines the termination status of a disruption based on various factors. +func (r *Disruption) TerminationStatus(chaosPods []corev1.Pod) TerminationStatus { + // a not yet created disruption is neither temporarily nor definitively ended + if r.CreationTimestamp.IsZero() { + return TSNotTerminated + } + + // a definitive state (expired duration or deletion) imply a definitively deleted injection + // and should be returned prior to a temporarily terminated state + if r.RemainingDuration() <= 0 || !r.DeletionTimestamp.IsZero() { + return TSDefinitivelyTerminated + } + + if len(chaosPods) == 0 { + // we were never injected, we are hence not terminated if we reach here + if r.Status.InjectionStatus.NeverInjected() { + return TSNotTerminated + } + + // we were injected before hence temporarily not terminated + return TSTemporarilyTerminated + } + + // if all pods exited successfully, we can consider the disruption is ended already + // it can be caused by either an appromixative date sync (in a distributed infra it's hard) + // or by deletion of targets leading to deletion of injectors + // injection terminated with an error are considered NOT terminated + for _, chaosPod := range chaosPods { + for _, containerStatuses := range chaosPod.Status.ContainerStatuses { + if containerStatuses.State.Terminated == nil || containerStatuses.State.Terminated.ExitCode != 0 { + return TSNotTerminated + } + } + } + + // this MIGHT be a temporary status, that could become definitive once disruption is expired or deleted + return TSTemporarilyTerminated +} + +// GetTargetsCountAsInt This function returns a scaled value from the spec.Count IntOrString type. If the count +// // is a percentage string value it's treated as a percentage and scaled appropriately +// // in accordance to the total, if it's an int value it's treated as a simple value and +// // if it is a string value which is either non-numeric or numeric but lacking a trailing '%' it returns an error. +func (r *Disruption) GetTargetsCountAsInt(targetTotal int, roundUp bool) (int, error) { + if r.Spec.Count == nil { + return 0, apierrors.NewBadRequest("nil value for IntOrString") + } + + return intstr.GetScaledValueFromIntOrPercent(r.Spec.Count, targetTotal, roundUp) +} + // +kubebuilder:object:root=true // DisruptionList contains a list of Disruption diff --git a/api/v1beta1/disruption_types_test.go b/api/v1beta1/disruption_types_test.go index 550ce919b..95a78ae75 100644 --- a/api/v1beta1/disruption_types_test.go +++ b/api/v1beta1/disruption_types_test.go @@ -7,13 +7,17 @@ package v1beta1_test import ( "sort" + "time" . "github.com/DataDog/chaos-controller/api/v1beta1" + builderstest "github.com/DataDog/chaos-controller/builderstest" + chaostypes "github.com/DataDog/chaos-controller/types" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/selection" + "k8s.io/apimachinery/pkg/util/intstr" ) var _ = Describe("TargetInjections", func() { @@ -45,7 +49,7 @@ var _ = Describe("TargetInjections", func() { }) var _ = Describe("AdvancedSelectorsToRequirements", func() { - Context("valid advancedselectors", func() { + Context("valid advanced selectors", func() { It("should return valid requirements", func() { advancedSelectors := []metav1.LabelSelectorRequirement{ { @@ -137,3 +141,312 @@ var _ = Describe("Check if a target exist into DisruptionStatus targets list", f }) }) }) + +var _ = Describe("Disruption", func() { + + var ( + defaultCreationTimestamp = time.Now() + notBeforeTime = defaultCreationTimestamp.Add(time.Minute) + ) + + DescribeTable("TimeToInject", func(disruptionBuilder *builderstest.DisruptionBuilder, expectedTime time.Time) { + // Arrange + disruption := disruptionBuilder.WithCreationTime(defaultCreationTimestamp).Build() + + // Action && Assert + Expect(disruption.TimeToInject()).To(Equal(expectedTime)) + }, + Entry( + "should return creationTimestamp if triggers is nil", + builderstest.NewDisruptionBuilder(), defaultCreationTimestamp), + Entry( + "should return triggers.createPods if triggers.inject is nil", + builderstest.NewDisruptionBuilder().WithDisruptionTriggers(DisruptionTriggers{ + CreatePods: DisruptionTrigger{ + NotBefore: metav1.NewTime(notBeforeTime), + Offset: "", + }, + }), notBeforeTime), + Entry( + "should return inject.notBefore if set", + builderstest.NewDisruptionBuilder().WithDisruptionTriggers(DisruptionTriggers{ + Inject: DisruptionTrigger{ + NotBefore: metav1.NewTime(notBeforeTime), + Offset: "", + }, + CreatePods: DisruptionTrigger{ + NotBefore: metav1.Time{}, + Offset: "2m", + }, + }), notBeforeTime), + Entry( + "should return a time after creationTimestamp if inject.offset is set", + builderstest.NewDisruptionBuilder().WithDisruptionTriggers(DisruptionTriggers{ + Inject: DisruptionTrigger{ + NotBefore: metav1.Time{}, + Offset: "1m", + }, + }), notBeforeTime), + Entry( + "should return creationTimestamp if inject.NotBefore is before creationTimestamp", + builderstest.NewDisruptionBuilder().WithDisruptionTriggers(DisruptionTriggers{ + CreatePods: DisruptionTrigger{ + NotBefore: metav1.NewTime(defaultCreationTimestamp.Add(-time.Minute)), + }, + }), defaultCreationTimestamp), + Entry( + "should return creationTimestamp + 5 minutes if createPods.offset is set", + builderstest.NewDisruptionBuilder().WithDisruptionTriggers(DisruptionTriggers{ + CreatePods: DisruptionTrigger{ + NotBefore: metav1.Time{}, + Offset: "5m", + }, + }), defaultCreationTimestamp.Add(time.Minute*5)), + Entry( + "should return creationTimestamp + 5 minutes if createPods.NotBefore is before creationTimestamp", + builderstest.NewDisruptionBuilder().WithDisruptionTriggers(DisruptionTriggers{ + CreatePods: DisruptionTrigger{ + NotBefore: metav1.NewTime(defaultCreationTimestamp.Add(-time.Minute * 5)), + }, + }), defaultCreationTimestamp), + ) + + DescribeTable("TimeToCreatePods", func(disruptionBuilder *builderstest.DisruptionBuilder, expectedTime time.Time) { + // Arrange + disruption := disruptionBuilder.WithCreationTime(defaultCreationTimestamp).Build() + + // Action && Assert + Expect(disruption.TimeToCreatePods()).To(Equal(expectedTime)) + }, + Entry( + "should return creationTimestamp if triggers is nil", + builderstest.NewDisruptionBuilder(), + defaultCreationTimestamp), + Entry( + "should return creationTimestamp if triggers.createPods is nil", + builderstest.NewDisruptionBuilder().WithDisruptionTriggers(DisruptionTriggers{ + Inject: DisruptionTrigger{ + Offset: "15m", + }, + }), + defaultCreationTimestamp), + Entry( + "should return createPods.notBefore if set", + builderstest.NewDisruptionBuilder().WithDisruptionTriggers(DisruptionTriggers{ + Inject: DisruptionTrigger{ + Offset: "15m", + }, + CreatePods: DisruptionTrigger{ + NotBefore: metav1.NewTime(notBeforeTime), + Offset: "", + }, + }), + notBeforeTime), + Entry( + "should return a time after creationTimestamp if createPods.offset is set", + builderstest.NewDisruptionBuilder().WithDisruptionTriggers(DisruptionTriggers{ + CreatePods: DisruptionTrigger{ + NotBefore: metav1.Time{}, + Offset: "5m", + }, + }), + defaultCreationTimestamp.Add(time.Minute*5)), + ) + + DescribeTable("TerminationStatus", func(disruptionBuilder *builderstest.DisruptionBuilder, pods builderstest.PodsBuilder, expectedTerminationStatus TerminationStatus) { + // Arrange + disruption := disruptionBuilder.Build() + + // Action && Assert + Expect(disruption.TerminationStatus(pods.Build())).To(Equal(expectedTerminationStatus)) + }, + Entry( + "not yet created disruption IS NOT terminated", + builderstest.NewDisruptionBuilder().Reset(), + nil, + TSNotTerminated), + Entry( + "1s before deadline, disruption IS NOT terminated", + builderstest.NewDisruptionBuilder().WithCreationDuration(time.Minute-time.Second), + builderstest.NewPodsBuilder(), + TSNotTerminated), + Entry( + "1s after deadline, disruption IS definitively terminated", + builderstest.NewDisruptionBuilder().WithCreationDuration(time.Minute+time.Second), + builderstest.NewPodsBuilder(), + TSDefinitivelyTerminated), + Entry( + "half duration disruption IS NOT terminated", + builderstest.NewDisruptionBuilder(), + builderstest.NewPodsBuilder(), + TSNotTerminated), + Entry( + "at deadline, disruption IS definitively terminated (however even ns before it is not)", + builderstest.NewDisruptionBuilder().WithCreationDuration(time.Minute), + builderstest.NewPodsBuilder(), + TSDefinitivelyTerminated), + Entry( + "deleted disruption IS definitively terminated", + builderstest.NewDisruptionBuilder().WithCreationDuration(time.Minute).WithDeletion(), + builderstest.NewPodsBuilder(), + TSDefinitivelyTerminated), + Entry( + "one chaos pod exited out of two IS NOT terminated", + builderstest.NewDisruptionBuilder(), + builderstest.NewPodsBuilder().One().Terminated().Parent(), + TSNotTerminated), + Entry( + "all chaos pods exited IS temporarily terminated", + builderstest.NewDisruptionBuilder(), + builderstest.NewPodsBuilder().One().Terminated().Parent().Two().Terminated().Parent(), + TSTemporarilyTerminated), + Entry( + "no pod injected is temporarily terminated", + builderstest.NewDisruptionBuilder().WithInjectionStatus(chaostypes.DisruptionInjectionStatusInjected), + nil, + TSTemporarilyTerminated), + Entry( + "no pod partially injected is temporarily terminated", + builderstest.NewDisruptionBuilder().WithInjectionStatus(chaostypes.DisruptionInjectionStatusPartiallyInjected), + nil, + TSTemporarilyTerminated), + Entry( + "no pod NOT injected is not terminated", + builderstest.NewDisruptionBuilder().WithInjectionStatus(chaostypes.DisruptionInjectionStatusNotInjected), + nil, + TSNotTerminated), + Entry( + "no pod initial injection status is not terminated", + builderstest.NewDisruptionBuilder(), + nil, + TSNotTerminated), + ) + + DescribeTable("RemainingDuration", func(disruptionBuilder *builderstest.DisruptionBuilder, expectedRemainingDuration time.Duration) { + // Arrange + disruption := disruptionBuilder.Build() + + // Action && Assert + remainingDuration := disruption.RemainingDuration().Round(time.Second).Truncate(2 * time.Second) + Expect(remainingDuration).To(Equal(expectedRemainingDuration)) + }, + Entry( + "should return 30 remaining duration seconds with a disruption created 30 seconds ago with a 1m duration", + builderstest.NewDisruptionBuilder().WithCreationDuration(30*time.Second).WithDuration("1m"), + 30*time.Second), + Entry( + "should return 90 remaining duration seconds with a disruption created 30 seconds ago with a 2m duration", + builderstest.NewDisruptionBuilder().WithCreationDuration(30*time.Second).WithDuration("2m"), + 90*time.Second), + ) + + Describe("GetTargetsCountAsInt", func() { + + DescribeTable("success cases", func(disruptionBuilder *builderstest.DisruptionBuilder, inputTargetCount int, inputRoundUp bool, expectedTargetCount int) { + // Arrange + disruption := disruptionBuilder.Build() + + // Action + disruptionTargetCount, err := disruption.GetTargetsCountAsInt(inputTargetCount, inputRoundUp) + + // Assert + Expect(err).ShouldNot(HaveOccurred()) + Expect(disruptionTargetCount).To(Equal(expectedTargetCount)) + }, + Entry( + "disruption with a count sets at 1 and a single target count with round up at false", + builderstest.NewDisruptionBuilder().WithCount(&intstr.IntOrString{ + Type: 0, + IntVal: 1, + StrVal: "1", + }), + 1, + false, + 1, + ), + Entry( + "should return 2 targets count with a disruption with a count sets at 2 and a single target count with round up at false", + builderstest.NewDisruptionBuilder().WithCount(&intstr.IntOrString{ + Type: 0, + IntVal: 2, + StrVal: "2", + }), + 1, + false, + 2, + ), + Entry( + "should return 1 target count with a disruption with a count sets at 100% and a single target count with round up at false", + builderstest.NewDisruptionBuilder().WithCount(&intstr.IntOrString{ + Type: 1, + IntVal: 100, + StrVal: "100%", + }), + 1, + false, + 1, + ), + Entry( + "should return 50 targets count with a disruption with a count sets at 50% and 100 targets count with round up at false", + builderstest.NewDisruptionBuilder().WithCount(&intstr.IntOrString{ + Type: 1, + IntVal: 50, + StrVal: "50%", + }), + 100, + false, + 50, + ), + Entry( + "should return 52 targets count with a disruption with a count sets at 51% and 101 targets count with round up at true", + builderstest.NewDisruptionBuilder().WithCount(&intstr.IntOrString{ + Type: 1, + IntVal: 51, + StrVal: "51%", + }), + 101, + true, + 52, + ), + Entry( + "should return 51 targets count with a disruption with a count sets at 51% and 101 targets count with round up at false", + builderstest.NewDisruptionBuilder().WithCount(&intstr.IntOrString{ + Type: 1, + IntVal: 51, + StrVal: "51%", + }), + 101, + false, + 51, + )) + + DescribeTable("error cases", func(disruptionBuilder *builderstest.DisruptionBuilder, inputTargetCount int, inputRoundUp bool, expectedErrorMessage string) { + disruption := disruptionBuilder.Build() + + // Action + _, err := disruption.GetTargetsCountAsInt(inputTargetCount, inputRoundUp) + + // Assert + Expect(err).Should(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring(expectedErrorMessage)) + }, + Entry( + "should return an error with a disruption without count", + builderstest.NewDisruptionBuilder(), + nil, + false, + "nil value for IntOrString", + ), + Entry( + "should return an error with a disruption with an invalid count", + builderstest.NewDisruptionBuilder().WithCount(&intstr.IntOrString{ + Type: 2, + IntVal: 0, + StrVal: "", + }), + nil, + false, + "invalid value for IntOrString", + )) + }) +}) diff --git a/api/v1beta1/disruption_webhook.go b/api/v1beta1/disruption_webhook.go index f7017c40a..99f96be8b 100644 --- a/api/v1beta1/disruption_webhook.go +++ b/api/v1beta1/disruption_webhook.go @@ -47,7 +47,7 @@ var ( defaultClusterThreshold float64 handlerEnabled bool defaultDuration time.Duration - cloudServicesProvidersManager *cloudservice.CloudServicesProvidersManager + cloudServicesProvidersManager cloudservice.CloudServicesProvidersManager chaosNamespace string ddmarkClient ddmark.Client safemodeEnvironment string diff --git a/api/v1beta1/disruption_webhook_test.go b/api/v1beta1/disruption_webhook_test.go index 6b65528f6..e582f2562 100644 --- a/api/v1beta1/disruption_webhook_test.go +++ b/api/v1beta1/disruption_webhook_test.go @@ -137,7 +137,7 @@ var _ = Describe("Disruption", func() { // Assert By("return an error") Expect(err).Should(HaveOccurred()) - Expect(err.Error()).Should(Equal("the user info annotation is immutable")) + Expect(err).To(MatchError("the user info annotation is immutable")) }) }) When("the user info of the new disruption is empty too", func() { @@ -165,7 +165,7 @@ var _ = Describe("Disruption", func() { // Assert By("return an error") Expect(err).Should(HaveOccurred()) - Expect(err.Error()).Should(Equal("the user info annotation is immutable")) + Expect(err).To(MatchError("the user info annotation is immutable")) }) }) }) @@ -292,7 +292,7 @@ var _ = Describe("Disruption", func() { // Assert Expect(err).Should(HaveOccurred()) - Expect(err.Error()).Should(Equal("1 error occurred:\n\t* Spec: either selector or advancedSelector field must be set\n\n")) + Expect(err).To(MatchError("1 error occurred:\n\t* Spec: either selector or advancedSelector field must be set\n\n")) Expect(ddmarkMock.AssertNumberOfCalls(GinkgoT(), "ValidateStructMultierror", 0)).To(BeTrue()) }) }) @@ -305,7 +305,7 @@ var _ = Describe("Disruption", func() { err := invalidDisruption.ValidateCreate() Expect(err).Should(HaveOccurred()) - Expect(err.Error()).Should(Equal("1 error occurred:\n\t* Spec: unable to parse requirement: values[0][app]: Invalid value: \"demo-{nginx}\": a valid label must be an empty string or consist of alphanumeric characters, '-', '_' or '.', and must start and end with an alphanumeric character (e.g. 'MyValue', or 'my_value', or '12345', regex used for validation is '(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?')\n\n")) + Expect(err).To(MatchError("1 error occurred:\n\t* Spec: unable to parse requirement: values[0][app]: Invalid value: \"demo-{nginx}\": a valid label must be an empty string or consist of alphanumeric characters, '-', '_' or '.', and must start and end with an alphanumeric character (e.g. 'MyValue', or 'my_value', or '12345', regex used for validation is '(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?')\n\n")) Expect(ddmarkMock.AssertNumberOfCalls(GinkgoT(), "ValidateStructMultierror", 0)).To(BeTrue()) }) }) @@ -322,7 +322,7 @@ var _ = Describe("Disruption", func() { err := invalidDisruption.ValidateCreate() Expect(err).Should(HaveOccurred()) - Expect(err.Error()).Should(Equal("1 error occurred:\n\t* Spec: error parsing given advanced selector to requirements: values[0][app]: Invalid value: \"*nginx\": a valid label must be an empty string or consist of alphanumeric characters, '-', '_' or '.', and must start and end with an alphanumeric character (e.g. 'MyValue', or 'my_value', or '12345', regex used for validation is '(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?')\n\n")) + Expect(err).To(MatchError("1 error occurred:\n\t* Spec: error parsing given advanced selector to requirements: values[0][app]: Invalid value: \"*nginx\": a valid label must be an empty string or consist of alphanumeric characters, '-', '_' or '.', and must start and end with an alphanumeric character (e.g. 'MyValue', or 'my_value', or '12345', regex used for validation is '(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?')\n\n")) Expect(ddmarkMock.AssertNumberOfCalls(GinkgoT(), "ValidateStructMultierror", 0)).To(BeTrue()) }) }) diff --git a/api/v1beta1/network_disruption.go b/api/v1beta1/network_disruption.go index fdddd3416..68f24aefd 100644 --- a/api/v1beta1/network_disruption.go +++ b/api/v1beta1/network_disruption.go @@ -12,6 +12,8 @@ import ( "strconv" "strings" + "github.com/DataDog/chaos-controller/cloudservice" + "github.com/DataDog/chaos-controller/cloudservice/types" "github.com/hashicorp/go-multierror" v1 "k8s.io/api/core/v1" ) @@ -559,3 +561,36 @@ func (s NetworkDisruptionServiceSpec) ExtractAffectedPortsInServicePorts(k8sServ return goodPorts, notFoundPorts } + +// TransformCloudSpecToHostsSpec from a cloud spec disruption, get all ip ranges of services provided and transform them into a list of hosts spec +func TransformCloudSpecToHostsSpec(cloudManager cloudservice.CloudServicesProvidersManager, cloudSpec *NetworkDisruptionCloudSpec) ([]NetworkDisruptionHostSpec, error) { + var hosts []NetworkDisruptionHostSpec + + clouds := cloudSpec.TransformToCloudMap() + + for cloudName, serviceList := range clouds { + var serviceListNames []string + + for _, service := range serviceList { + serviceListNames = append(serviceListNames, service.ServiceName) + } + + ipRangesPerService, err := cloudManager.GetServicesIPRanges(types.CloudProviderName(cloudName), serviceListNames) + if err != nil { + return nil, err + } + + for _, serviceSpec := range serviceList { + for _, ipRange := range ipRangesPerService[serviceSpec.ServiceName] { + hosts = append(hosts, NetworkDisruptionHostSpec{ + Host: ipRange, + Protocol: serviceSpec.Protocol, + Flow: serviceSpec.Flow, + ConnState: serviceSpec.ConnState, + }) + } + } + } + + return hosts, nil +} diff --git a/api/v1beta1/validations.go b/api/v1beta1/validations.go index 2a30e750f..c284207d0 100644 --- a/api/v1beta1/validations.go +++ b/api/v1beta1/validations.go @@ -117,3 +117,11 @@ func ValidateCount(count *intstr.IntOrString) error { return nil } + +// IsUpdateConflictError tells us if this error is of the form: +// "Operation cannot be fulfilled on disruptions.chaos.datadoghq.com "chaos-network-drop": the object has been modified; please apply your changes to the latest version and try again" +// Sadly this doesn't seem to be one of the errors checkable with a function from "k8s.io/apimachinery/pkg/api/errors" +// So we parse the error message directly +func IsUpdateConflictError(err error) bool { + return strings.Contains(err.Error(), "please apply your changes to the latest version and try again") +} diff --git a/builderstest/chaospod.go b/builderstest/chaospod.go new file mode 100644 index 000000000..9c915cc14 --- /dev/null +++ b/builderstest/chaospod.go @@ -0,0 +1,338 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2023 Datadog, Inc. + +package builderstest_test + +import ( + "time" + + "github.com/DataDog/chaos-controller/env" + "github.com/DataDog/chaos-controller/types" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// ChaosPodBuilder is a struct used to build a chaos pod instance. +type ChaosPodBuilder struct { + *v1.Pod + // we store action we want to perform instead of performing them right away because they are time sensitive + // this enables us to ensure time.Now is as late as it can be without faking it (that we should do at some point) + modifiers []func() +} + +// NewPodBuilder creates a new ChaosPodBuilder instance with initial pod configuration. +func NewPodBuilder(podName, namespace string) *ChaosPodBuilder { + return (&ChaosPodBuilder{ + Pod: &v1.Pod{ + TypeMeta: metav1.TypeMeta{ + Kind: "pod", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Namespace: namespace, + CreationTimestamp: metav1.NewTime(time.Now()), + Labels: map[string]string{ + "app": podName, + }, + }, + }, + }).WithCreation(30 * time.Second) +} + +// Build generates a v1.Pod instance based on the configuration. +func (b *ChaosPodBuilder) Build() v1.Pod { + for _, modifier := range b.modifiers { + modifier() + } + + return *b.Pod +} + +// Reset resets the ChaosPodBuilder by clearing all modifiers. +func (b *ChaosPodBuilder) Reset() *ChaosPodBuilder { + b.modifiers = nil + + return b +} + +// WithCreation adjusts the creation timestamp. +func (b *ChaosPodBuilder) WithCreation(past time.Duration) *ChaosPodBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.CreationTimestamp = metav1.NewTime(time.Now().Add(-past)) + }) + + return b +} + +// WithDeletion sets the deletion timestamp to the current time. +func (b *ChaosPodBuilder) WithDeletion() *ChaosPodBuilder { + b.modifiers = append( + b.modifiers, + func() { + v1t := metav1.NewTime(time.Now()) + + b.DeletionTimestamp = &v1t + }) + + return b +} + +// WithChaosPodLabels sets chaos-related labels. +func (b *ChaosPodBuilder) WithChaosPodLabels(name, namespace, target, kind string) *ChaosPodBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Labels[types.DisruptionNameLabel] = name + b.Labels[types.DisruptionNamespaceLabel] = namespace + b.Labels[types.TargetLabel] = target + b.Labels[types.DisruptionKindLabel] = kind + }) + + return b +} + +// WithLabels sets custom labels. +func (b *ChaosPodBuilder) WithLabels(labels map[string]string) *ChaosPodBuilder { + b.modifiers = append( + b.modifiers, + func() { + for key, value := range labels { + b.Labels[key] = value + } + }) + + return b +} + +// WithStatusPhase sets the status phase. +func (b *ChaosPodBuilder) WithStatusPhase(phase v1.PodPhase) *ChaosPodBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Status.Phase = phase + }) + + return b +} + +// WithChaosFinalizer sets the ChaosPodFinalizer. +func (b *ChaosPodBuilder) WithChaosFinalizer() *ChaosPodBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.SetFinalizers([]string{types.ChaosPodFinalizer}) + }) + + return b +} + +// WithStatus sets the status. +func (b *ChaosPodBuilder) WithStatus(status v1.PodStatus) *ChaosPodBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Status = status + }) + + return b +} + +// WithContainerStatuses sets the container statuses to the status. +func (b *ChaosPodBuilder) WithContainerStatuses(statuses []v1.ContainerStatus) *ChaosPodBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Status.ContainerStatuses = statuses + }) + + return b +} + +// WithPullSecrets sets image pull secrets to the spec. +func (b *ChaosPodBuilder) WithPullSecrets(imagePullSecrets []v1.LocalObjectReference) *ChaosPodBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Spec.ImagePullSecrets = imagePullSecrets + }) + + return b +} + +// WithChaosSpec sets the chaos-specific pod spec. +func (b *ChaosPodBuilder) WithChaosSpec(targetNodeName string, terminationGracePeriod, activeDeadlineSeconds int64, args []string, hostPathDirectory, pathFile v1.HostPathType, serviceAccountName string, image string) *ChaosPodBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Spec = v1.PodSpec{ + HostPID: true, // enable host pid + RestartPolicy: v1.RestartPolicyNever, // do not restart the pod on fail or completion + NodeName: targetNodeName, // specify node name to schedule the pod + ServiceAccountName: serviceAccountName, // service account to use + TerminationGracePeriodSeconds: &terminationGracePeriod, + ActiveDeadlineSeconds: &activeDeadlineSeconds, + Containers: []v1.Container{ + { + Name: "injector", // container name + Image: image, // container image gathered from controller flags + ImagePullPolicy: v1.PullIfNotPresent, // pull the image only when it is not present + Args: args, // pass disruption arguments + SecurityContext: &v1.SecurityContext{ + Privileged: func() *bool { b := true; return &b }(), // enable privileged mode + }, + ReadinessProbe: &v1.Probe{ // define readiness probe (file created by the injector when the injection is successful) + PeriodSeconds: 1, + FailureThreshold: 5, + ProbeHandler: v1.ProbeHandler{ + Exec: &v1.ExecAction{ + Command: []string{"test", "-f", "/tmp/readiness_probe"}, + }, + }, + }, + Resources: v1.ResourceRequirements{ // set resources requests and limits to zero + Limits: v1.ResourceList{ + v1.ResourceCPU: *resource.NewQuantity(0, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI), + }, + Requests: v1.ResourceList{ + v1.ResourceCPU: *resource.NewQuantity(0, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI), + }, + }, + Env: []v1.EnvVar{ // define environment variables + { + Name: env.InjectorTargetPodHostIP, + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + FieldPath: "status.hostIP", + }, + }, + }, + { + Name: env.InjectorChaosPodIP, + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + FieldPath: "status.podIP", + }, + }, + }, + { + Name: env.InjectorPodName, + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }, + { + Name: env.InjectorMountHost, + Value: "/mnt/host/", + }, + { + Name: env.InjectorMountProc, + Value: "/mnt/host/proc/", + }, + { + Name: env.InjectorMountSysrq, + Value: "/mnt/sysrq", + }, + { + Name: env.InjectorMountSysrqTrigger, + Value: "/mnt/sysrq-trigger", + }, + { + Name: env.InjectorMountCgroup, + Value: "/mnt/cgroup/", + }, + }, + VolumeMounts: []v1.VolumeMount{ // define volume mounts required for disruptions to work + { + Name: "run", + MountPath: "/run", + }, + { + Name: "sysrq", + MountPath: "/mnt/sysrq", + }, + { + Name: "sysrq-trigger", + MountPath: "/mnt/sysrq-trigger", + }, + { + Name: "cgroup", + MountPath: "/mnt/cgroup", + }, + { + Name: "host", + MountPath: "/mnt/host", + ReadOnly: true, + }, + }, + }, + }, + Volumes: []v1.Volume{ // declare volumes required for disruptions to work + { + Name: "run", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/run", + Type: &hostPathDirectory, + }, + }, + }, + { + Name: "proc", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/proc", + Type: &hostPathDirectory, + }, + }, + }, + { + Name: "sysrq", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/proc/sys/kernel/sysrq", + Type: &pathFile, + }, + }, + }, + { + Name: "sysrq-trigger", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/proc/sysrq-trigger", + Type: &pathFile, + }, + }, + }, + { + Name: "cgroup", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/sys/fs/cgroup", + Type: &hostPathDirectory, + }, + }, + }, + { + Name: "host", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/", + Type: &hostPathDirectory, + }, + }, + }, + }, + } + }) + + return b +} diff --git a/builderstest/disruption.go b/builderstest/disruption.go new file mode 100644 index 000000000..1fd79d506 --- /dev/null +++ b/builderstest/disruption.go @@ -0,0 +1,236 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2023 Datadog, Inc. + +package builderstest_test + +import ( + "time" + + "github.com/DataDog/chaos-controller/api/v1beta1" + "github.com/DataDog/chaos-controller/types" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +// DisruptionBuilder is a struct used to build a disruption instance. +type DisruptionBuilder struct { + *v1beta1.Disruption + // we store action we want to perform instead of performing them right away because they are time sensitive + // this enables us to ensure time.Now is as late as it can be without faking it (that we should do at some point) + modifiers []func() +} + +// NewDisruptionBuilder creates a new DisruptionBuilder instance with an initial disruption spec and a creation timestamp modifier. +func NewDisruptionBuilder() *DisruptionBuilder { + return (&DisruptionBuilder{ + Disruption: &v1beta1.Disruption{ + Spec: v1beta1.DisruptionSpec{ + Duration: "1m", // per spec definition a valid disruption going to the reconcile loop MUST have a duration, let's not test wrong test cases + }, + }, + }).WithCreationDuration(30 * time.Second) +} + +// WithDisruptionTriggers sets the specified triggers of disruption. +func (b *DisruptionBuilder) WithDisruptionTriggers(triggers v1beta1.DisruptionTriggers) *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Spec.Triggers = triggers + }) + + return b +} + +// WithCount sets the specified count. +func (b *DisruptionBuilder) WithCount(count *intstr.IntOrString) *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Spec.Count = count + }) + + return b +} + +// WithDisruptionKind sets the specified kind of disruption in the DisruptionBuilder's spec. +func (b *DisruptionBuilder) WithDisruptionKind(kind types.DisruptionKindName) *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + switch kind { + case types.DisruptionKindNodeFailure: + if b.Spec.NodeFailure == nil { + b.Spec.NodeFailure = &v1beta1.NodeFailureSpec{} + } + case types.DisruptionKindContainerFailure: + if b.Spec.ContainerFailure == nil { + b.Spec.ContainerFailure = &v1beta1.ContainerFailureSpec{} + } + case types.DisruptionKindNetworkDisruption: + if b.Spec.Network == nil { + b.Spec.Network = &v1beta1.NetworkDisruptionSpec{} + } + case types.DisruptionKindCPUPressure: + if b.Spec.CPUPressure == nil { + b.Spec.CPUPressure = &v1beta1.CPUPressureSpec{} + } + case types.DisruptionKindDiskPressure: + if b.Spec.DiskPressure == nil { + b.Spec.DiskPressure = &v1beta1.DiskPressureSpec{} + } + case types.DisruptionKindDNSDisruption: + if b.Spec.DNS == nil { + b.Spec.DNS = v1beta1.DNSDisruptionSpec{} + } + case types.DisruptionKindGRPCDisruption: + if b.Spec.GRPC == nil { + b.Spec.GRPC = &v1beta1.GRPCDisruptionSpec{} + } + case types.DisruptionKindDiskFailure: + if b.Spec.DiskFailure == nil { + b.Spec.DiskFailure = &v1beta1.DiskFailureSpec{} + } + } + }) + + return b +} + +// Build generates a v1.Disruption instance based on the configuration. +func (b *DisruptionBuilder) Build() v1beta1.Disruption { + for _, modifier := range b.modifiers { + modifier() + } + + return *b.Disruption +} + +// Reset resets the DisruptionBuilder by clearing all modifiers. +func (b *DisruptionBuilder) Reset() *DisruptionBuilder { + b.modifiers = nil + + return b +} + +// WithCreationDuration adjusts the creation timestamp. +func (b *DisruptionBuilder) WithCreationDuration(past time.Duration) *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.CreationTimestamp = v1.NewTime(time.Now().Add(-past)) + }) + + return b +} + +// WithCreationTime adjusts the creation timestamp. +func (b *DisruptionBuilder) WithCreationTime(creationTimestamp time.Time) *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.CreationTimestamp = v1.NewTime(creationTimestamp) + }) + + return b +} + +// WithDuration sets the duration timestamp. +func (b *DisruptionBuilder) WithDuration(duration v1beta1.DisruptionDuration) *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Spec.Duration = duration + }) + + return b +} + +// WithDeletion sets the deletion timestamp to the current time. +func (b *DisruptionBuilder) WithDeletion() *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + v1t := v1.NewTime(time.Now()) + + b.DeletionTimestamp = &v1t + }) + + return b +} + +// WithNamespace sets the namespace. +func (b *DisruptionBuilder) WithNamespace(namespace string) *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Namespace = namespace + }) + + return b +} + +// WithName sets the name. +func (b *DisruptionBuilder) WithName(name string) *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Name = name + }) + + return b +} + +// WithNetworkDisruptionCloudSpec sets the NetworkDisruptionCloudSpecs to the Network spec. +func (b *DisruptionBuilder) WithNetworkDisruptionCloudSpec(spec *v1beta1.NetworkDisruptionCloudSpec) *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + if b.Spec.Network == nil { + b.Spec.Network = &v1beta1.NetworkDisruptionSpec{} + } + + b.Spec.Network.Cloud = spec + }) + + return b +} + +// WithSpecPulse sets the DisruptionPulse to the Pulse spec. +func (b *DisruptionBuilder) WithSpecPulse(specPulse *v1beta1.DisruptionPulse) *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Spec.Pulse = specPulse + }) + + return b +} + +// WithNetworkDisableDefaultAllowedHosts set the NetworkDisruptionSpec to the network spec. +func (b *DisruptionBuilder) WithNetworkDisableDefaultAllowedHosts(enable bool) *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + if b.Spec.Network == nil { + b.Spec.Network = &v1beta1.NetworkDisruptionSpec{} + } + + b.Spec.Network.DisableDefaultAllowedHosts = enable + }) + + return b +} + +// WithInjectionStatus sets the specified injection status in the DisruptionBuilder's status. +func (b *DisruptionBuilder) WithInjectionStatus(status types.DisruptionInjectionStatus) *DisruptionBuilder { + b.modifiers = append( + b.modifiers, + func() { + b.Status.InjectionStatus = status + }) + + return b +} diff --git a/builderstest/pod.go b/builderstest/pod.go new file mode 100644 index 000000000..c4efcca3e --- /dev/null +++ b/builderstest/pod.go @@ -0,0 +1,100 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2023 Datadog, Inc. + +package builderstest_test + +import v1 "k8s.io/api/core/v1" + +// PodsBuilder is a list of PodBuilder. +type PodsBuilder []*PodBuilder + +// PodBuilder is a struct used for building v1.Pod instances with modifications. +type PodBuilder struct { + *v1.Pod // The built v1.Pod instance + parent PodsBuilder // The parent PodsBuilder instance associated with this PodBuilder +} + +// NewPodsBuilder creates a new PodsBuilder instance with predefined pod data. +func NewPodsBuilder() PodsBuilder { + return PodsBuilder{ + { + Pod: &v1.Pod{ + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{}, + }, + }, + }, + }, + }, + { + Pod: &v1.Pod{ + Status: v1.PodStatus{ + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{}, + }, + }, + }, + }, + }, + } +} + +// Build constructs and returns a slice of v1.Pod based on the configuration set in the PodsBuilder. +func (p PodsBuilder) Build() []v1.Pod { + if p == nil { + return nil + } + + pods := make([]v1.Pod, 0, len(p)) + + for _, pod := range p { + pods = append(pods, *pod.Pod) + } + + return pods +} + +// Take returns a pointer to a PodBuilder for the specified index from the PodsBuilder. +func (p PodsBuilder) Take(index int) *PodBuilder { + // Check if the parent of the PodBuilder at the specified index is uninitialized (nil). + // If uninitialized, set the parent of the PodBuilder to the current PodsBuilder. + if p[index].parent == nil { + p[index].parent = p + } + + return p[index] +} + +// One returns a pointer to a PodBuilder for the first pod in the PodsBuilder. +func (p PodsBuilder) One() *PodBuilder { + return p.Take(0) +} + +// Two returns a pointer to a PodBuilder for the second pod in the PodsBuilder. +func (p PodsBuilder) Two() *PodBuilder { + return p.Take(1) +} + +// Parent returns the parent PodsBuilder associated with the PodBuilder. +func (p *PodBuilder) Parent() PodsBuilder { + return p.parent +} + +// TerminatedWith sets the termination state of the container in the Pod to a terminated state with the specified exit code. +func (p *PodBuilder) TerminatedWith(exitCode int32) *PodBuilder { + p.Pod.Status.ContainerStatuses[0].State.Terminated = &v1.ContainerStateTerminated{ + ExitCode: exitCode, + } + + return p +} + +// Terminated sets the termination state of the container in the Pod to a terminated state with exit code 0. +func (p *PodBuilder) Terminated() *PodBuilder { + return p.TerminatedWith(0) +} diff --git a/cloudservice/cloud_services_providers_manager_mock.go b/cloudservice/cloud_services_providers_manager_mock.go new file mode 100644 index 000000000..1adad0b1b --- /dev/null +++ b/cloudservice/cloud_services_providers_manager_mock.go @@ -0,0 +1,288 @@ +// Code generated by mockery. DO NOT EDIT. + +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2023 Datadog, Inc. +package cloudservice + +import ( + types "github.com/DataDog/chaos-controller/cloudservice/types" + mock "github.com/stretchr/testify/mock" +) + +// CloudServicesProvidersManagerMock is an autogenerated mock type for the CloudServicesProvidersManager type +type CloudServicesProvidersManagerMock struct { + mock.Mock +} + +type CloudServicesProvidersManagerMock_Expecter struct { + mock *mock.Mock +} + +func (_m *CloudServicesProvidersManagerMock) EXPECT() *CloudServicesProvidersManagerMock_Expecter { + return &CloudServicesProvidersManagerMock_Expecter{mock: &_m.Mock} +} + +// GetProviderByName provides a mock function with given fields: name +func (_m *CloudServicesProvidersManagerMock) GetProviderByName(name types.CloudProviderName) *CloudServicesProvider { + ret := _m.Called(name) + + var r0 *CloudServicesProvider + if rf, ok := ret.Get(0).(func(types.CloudProviderName) *CloudServicesProvider); ok { + r0 = rf(name) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*CloudServicesProvider) + } + } + + return r0 +} + +// CloudServicesProvidersManagerMock_GetProviderByName_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetProviderByName' +type CloudServicesProvidersManagerMock_GetProviderByName_Call struct { + *mock.Call +} + +// GetProviderByName is a helper method to define mock.On call +// - name types.CloudProviderName +func (_e *CloudServicesProvidersManagerMock_Expecter) GetProviderByName(name interface{}) *CloudServicesProvidersManagerMock_GetProviderByName_Call { + return &CloudServicesProvidersManagerMock_GetProviderByName_Call{Call: _e.mock.On("GetProviderByName", name)} +} + +func (_c *CloudServicesProvidersManagerMock_GetProviderByName_Call) Run(run func(name types.CloudProviderName)) *CloudServicesProvidersManagerMock_GetProviderByName_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(types.CloudProviderName)) + }) + return _c +} + +func (_c *CloudServicesProvidersManagerMock_GetProviderByName_Call) Return(_a0 *CloudServicesProvider) *CloudServicesProvidersManagerMock_GetProviderByName_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *CloudServicesProvidersManagerMock_GetProviderByName_Call) RunAndReturn(run func(types.CloudProviderName) *CloudServicesProvider) *CloudServicesProvidersManagerMock_GetProviderByName_Call { + _c.Call.Return(run) + return _c +} + +// GetServiceList provides a mock function with given fields: cloudProviderName +func (_m *CloudServicesProvidersManagerMock) GetServiceList(cloudProviderName types.CloudProviderName) []string { + ret := _m.Called(cloudProviderName) + + var r0 []string + if rf, ok := ret.Get(0).(func(types.CloudProviderName) []string); ok { + r0 = rf(cloudProviderName) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]string) + } + } + + return r0 +} + +// CloudServicesProvidersManagerMock_GetServiceList_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetServiceList' +type CloudServicesProvidersManagerMock_GetServiceList_Call struct { + *mock.Call +} + +// GetServiceList is a helper method to define mock.On call +// - cloudProviderName types.CloudProviderName +func (_e *CloudServicesProvidersManagerMock_Expecter) GetServiceList(cloudProviderName interface{}) *CloudServicesProvidersManagerMock_GetServiceList_Call { + return &CloudServicesProvidersManagerMock_GetServiceList_Call{Call: _e.mock.On("GetServiceList", cloudProviderName)} +} + +func (_c *CloudServicesProvidersManagerMock_GetServiceList_Call) Run(run func(cloudProviderName types.CloudProviderName)) *CloudServicesProvidersManagerMock_GetServiceList_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(types.CloudProviderName)) + }) + return _c +} + +func (_c *CloudServicesProvidersManagerMock_GetServiceList_Call) Return(_a0 []string) *CloudServicesProvidersManagerMock_GetServiceList_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *CloudServicesProvidersManagerMock_GetServiceList_Call) RunAndReturn(run func(types.CloudProviderName) []string) *CloudServicesProvidersManagerMock_GetServiceList_Call { + _c.Call.Return(run) + return _c +} + +// GetServicesIPRanges provides a mock function with given fields: cloudProviderName, serviceNames +func (_m *CloudServicesProvidersManagerMock) GetServicesIPRanges(cloudProviderName types.CloudProviderName, serviceNames []string) (map[string][]string, error) { + ret := _m.Called(cloudProviderName, serviceNames) + + var r0 map[string][]string + var r1 error + if rf, ok := ret.Get(0).(func(types.CloudProviderName, []string) (map[string][]string, error)); ok { + return rf(cloudProviderName, serviceNames) + } + if rf, ok := ret.Get(0).(func(types.CloudProviderName, []string) map[string][]string); ok { + r0 = rf(cloudProviderName, serviceNames) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(map[string][]string) + } + } + + if rf, ok := ret.Get(1).(func(types.CloudProviderName, []string) error); ok { + r1 = rf(cloudProviderName, serviceNames) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// CloudServicesProvidersManagerMock_GetServicesIPRanges_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetServicesIPRanges' +type CloudServicesProvidersManagerMock_GetServicesIPRanges_Call struct { + *mock.Call +} + +// GetServicesIPRanges is a helper method to define mock.On call +// - cloudProviderName types.CloudProviderName +// - serviceNames []string +func (_e *CloudServicesProvidersManagerMock_Expecter) GetServicesIPRanges(cloudProviderName interface{}, serviceNames interface{}) *CloudServicesProvidersManagerMock_GetServicesIPRanges_Call { + return &CloudServicesProvidersManagerMock_GetServicesIPRanges_Call{Call: _e.mock.On("GetServicesIPRanges", cloudProviderName, serviceNames)} +} + +func (_c *CloudServicesProvidersManagerMock_GetServicesIPRanges_Call) Run(run func(cloudProviderName types.CloudProviderName, serviceNames []string)) *CloudServicesProvidersManagerMock_GetServicesIPRanges_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(types.CloudProviderName), args[1].([]string)) + }) + return _c +} + +func (_c *CloudServicesProvidersManagerMock_GetServicesIPRanges_Call) Return(_a0 map[string][]string, _a1 error) *CloudServicesProvidersManagerMock_GetServicesIPRanges_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *CloudServicesProvidersManagerMock_GetServicesIPRanges_Call) RunAndReturn(run func(types.CloudProviderName, []string) (map[string][]string, error)) *CloudServicesProvidersManagerMock_GetServicesIPRanges_Call { + _c.Call.Return(run) + return _c +} + +// PullIPRanges provides a mock function with given fields: +func (_m *CloudServicesProvidersManagerMock) PullIPRanges() error { + ret := _m.Called() + + var r0 error + if rf, ok := ret.Get(0).(func() error); ok { + r0 = rf() + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// CloudServicesProvidersManagerMock_PullIPRanges_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'PullIPRanges' +type CloudServicesProvidersManagerMock_PullIPRanges_Call struct { + *mock.Call +} + +// PullIPRanges is a helper method to define mock.On call +func (_e *CloudServicesProvidersManagerMock_Expecter) PullIPRanges() *CloudServicesProvidersManagerMock_PullIPRanges_Call { + return &CloudServicesProvidersManagerMock_PullIPRanges_Call{Call: _e.mock.On("PullIPRanges")} +} + +func (_c *CloudServicesProvidersManagerMock_PullIPRanges_Call) Run(run func()) *CloudServicesProvidersManagerMock_PullIPRanges_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *CloudServicesProvidersManagerMock_PullIPRanges_Call) Return(_a0 error) *CloudServicesProvidersManagerMock_PullIPRanges_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *CloudServicesProvidersManagerMock_PullIPRanges_Call) RunAndReturn(run func() error) *CloudServicesProvidersManagerMock_PullIPRanges_Call { + _c.Call.Return(run) + return _c +} + +// StartPeriodicPull provides a mock function with given fields: +func (_m *CloudServicesProvidersManagerMock) StartPeriodicPull() { + _m.Called() +} + +// CloudServicesProvidersManagerMock_StartPeriodicPull_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'StartPeriodicPull' +type CloudServicesProvidersManagerMock_StartPeriodicPull_Call struct { + *mock.Call +} + +// StartPeriodicPull is a helper method to define mock.On call +func (_e *CloudServicesProvidersManagerMock_Expecter) StartPeriodicPull() *CloudServicesProvidersManagerMock_StartPeriodicPull_Call { + return &CloudServicesProvidersManagerMock_StartPeriodicPull_Call{Call: _e.mock.On("StartPeriodicPull")} +} + +func (_c *CloudServicesProvidersManagerMock_StartPeriodicPull_Call) Run(run func()) *CloudServicesProvidersManagerMock_StartPeriodicPull_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *CloudServicesProvidersManagerMock_StartPeriodicPull_Call) Return() *CloudServicesProvidersManagerMock_StartPeriodicPull_Call { + _c.Call.Return() + return _c +} + +func (_c *CloudServicesProvidersManagerMock_StartPeriodicPull_Call) RunAndReturn(run func()) *CloudServicesProvidersManagerMock_StartPeriodicPull_Call { + _c.Call.Return(run) + return _c +} + +// StopPeriodicPull provides a mock function with given fields: +func (_m *CloudServicesProvidersManagerMock) StopPeriodicPull() { + _m.Called() +} + +// CloudServicesProvidersManagerMock_StopPeriodicPull_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'StopPeriodicPull' +type CloudServicesProvidersManagerMock_StopPeriodicPull_Call struct { + *mock.Call +} + +// StopPeriodicPull is a helper method to define mock.On call +func (_e *CloudServicesProvidersManagerMock_Expecter) StopPeriodicPull() *CloudServicesProvidersManagerMock_StopPeriodicPull_Call { + return &CloudServicesProvidersManagerMock_StopPeriodicPull_Call{Call: _e.mock.On("StopPeriodicPull")} +} + +func (_c *CloudServicesProvidersManagerMock_StopPeriodicPull_Call) Run(run func()) *CloudServicesProvidersManagerMock_StopPeriodicPull_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *CloudServicesProvidersManagerMock_StopPeriodicPull_Call) Return() *CloudServicesProvidersManagerMock_StopPeriodicPull_Call { + _c.Call.Return() + return _c +} + +func (_c *CloudServicesProvidersManagerMock_StopPeriodicPull_Call) RunAndReturn(run func()) *CloudServicesProvidersManagerMock_StopPeriodicPull_Call { + _c.Call.Return(run) + return _c +} + +type mockConstructorTestingTNewCloudServicesProvidersManagerMock interface { + mock.TestingT + Cleanup(func()) +} + +// NewCloudServicesProvidersManagerMock creates a new instance of CloudServicesProvidersManagerMock. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewCloudServicesProvidersManagerMock(t mockConstructorTestingTNewCloudServicesProvidersManagerMock) *CloudServicesProvidersManagerMock { + mock := &CloudServicesProvidersManagerMock{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/cloudservice/manager.go b/cloudservice/manager.go index c2c599b30..fc709aa50 100644 --- a/cloudservice/manager.go +++ b/cloudservice/manager.go @@ -20,34 +20,75 @@ import ( "go.uber.org/zap" ) -// CloudServicesProvidersManager Manager used to pull and parse any provider ip ranges per service -type CloudServicesProvidersManager struct { +// CloudServicesProvidersManager represents an interface for managing cloud service providers and their IP ranges. +type CloudServicesProvidersManager interface { + // GetServiceList returns a list of service names provided by the specified cloud provider. + GetServiceList(cloudProviderName types.CloudProviderName) []string + + // GetServicesIPRanges retrieves IP ranges for the specified services provided by the given cloud provider. + GetServicesIPRanges(cloudProviderName types.CloudProviderName, serviceNames []string) (map[string][]string, error) + + // PullIPRanges triggers the manual pulling of IP ranges for all cloud providers. + PullIPRanges() error + + // StartPeriodicPull starts the periodic process of pulling IP ranges from cloud providers. + StartPeriodicPull() + + // StopPeriodicPull stops the periodic process of pulling IP ranges from cloud providers. + StopPeriodicPull() + + // GetProviderByName retrieves the cloud services provider instance by its name. + GetProviderByName(name types.CloudProviderName) *CloudServicesProvider +} + +type cloudServicesProvidersManager struct { cloudProviders map[types.CloudProviderName]*CloudServicesProvider log *zap.SugaredLogger stopPeriodicPull chan bool periodicPullInterval time.Duration + client *http.Client } // CloudServicesProvider Data and ip ranges manager of one cloud provider type CloudServicesProvider struct { + // CloudProviderIPRangeManager is responsible for managing IP ranges for the cloud provider. CloudProviderIPRangeManager CloudProviderIPRangeManager - IPRangeInfo *types.CloudProviderIPRangeInfo - Conf types.CloudProviderConfig + + // IPRangeInfo stores information about the IP ranges of the cloud services provided by the cloud provider. + IPRangeInfo *types.CloudProviderIPRangeInfo + + // Conf contains the configuration settings for the cloud services provider. + Conf types.CloudProviderConfig } // CloudProviderIPRangeManager Methods to verify and transform a specifid ip ranges list from a provider type CloudProviderIPRangeManager interface { - IsNewVersion([]byte, string) (bool, error) - ConvertToGenericIPRanges([]byte) (*types.CloudProviderIPRangeInfo, error) + // IsNewVersion checks whether a given IP range data in the form of bytes is a new version compared to a given version string. + // It returns true if the data is a new version, otherwise false. An error is returned in case of any issues. + IsNewVersion(ipRangeData []byte, version string) (bool, error) + + // ConvertToGenericIPRanges converts the given IP range data in the form of bytes to a generic CloudProviderIPRangeInfo structure. + // It returns the converted IP range information or an error in case of any issues during conversion. + ConvertToGenericIPRanges(ipRangeData []byte) (*types.CloudProviderIPRangeInfo, error) } -func New(log *zap.SugaredLogger, config types.CloudProviderConfigs) (*CloudServicesProvidersManager, error) { - manager := &CloudServicesProvidersManager{ +// New creates a new instance of CloudServicesProvidersManager. +// It initializes the manager with cloud providers based on the configuration and sets up their IP range managers. +func New(log *zap.SugaredLogger, config types.CloudProviderConfigs, httpClientMock *http.Client) (CloudServicesProvidersManager, error) { + manager := &cloudServicesProvidersManager{ cloudProviders: map[types.CloudProviderName]*CloudServicesProvider{}, log: log, periodicPullInterval: config.PullInterval, } + if httpClientMock == nil { + manager.client = &http.Client{ + Timeout: time.Second * 10, + } + } else { + manager.client = httpClientMock + } + // return an empty manager if all providers are disabled if config.DisableAll { log.Info("all cloud providers are disabled") @@ -91,7 +132,7 @@ func New(log *zap.SugaredLogger, config types.CloudProviderConfigs) (*CloudServi } // StartPeriodicPull go routine pulling every interval all ip ranges of all cloud providers set up. -func (s *CloudServicesProvidersManager) StartPeriodicPull() { +func (s *cloudServicesProvidersManager) StartPeriodicPull() { s.log.Infow("starting periodic pull and parsing of the cloud provider ip ranges", "interval", s.periodicPullInterval.String()) go func() { @@ -111,14 +152,14 @@ func (s *CloudServicesProvidersManager) StartPeriodicPull() { } // StopPeriodicPull stop the goroutine pulling all ip ranges of all cloud providers -func (s *CloudServicesProvidersManager) StopPeriodicPull() { +func (s *cloudServicesProvidersManager) StopPeriodicPull() { s.log.Infow("closing periodic pull and parsing of the cloud provider ip ranges") s.stopPeriodicPull <- true } // PullIPRanges pull all ip ranges of all cloud providers -func (s *CloudServicesProvidersManager) PullIPRanges() error { +func (s *cloudServicesProvidersManager) PullIPRanges() error { errorMessage := "" s.log.Infow("pull and parse of the cloud provider ip ranges") @@ -139,7 +180,7 @@ func (s *CloudServicesProvidersManager) PullIPRanges() error { } // GetServicesIPRanges with a given list of service names and cloud provider name, returns the list of ip ranges of those services -func (s *CloudServicesProvidersManager) GetServicesIPRanges(cloudProviderName types.CloudProviderName, serviceNames []string) (map[string][]string, error) { +func (s *cloudServicesProvidersManager) GetServicesIPRanges(cloudProviderName types.CloudProviderName, serviceNames []string) (map[string][]string, error) { if s.cloudProviders[cloudProviderName] == nil { return nil, fmt.Errorf("cloud provider %s is not configured or does not exist", cloudProviderName) } @@ -168,7 +209,7 @@ func (s *CloudServicesProvidersManager) GetServicesIPRanges(cloudProviderName ty } // GetServiceList return the list of services of a specific cloud provider. Mostly used in disruption creation validation -func (s *CloudServicesProvidersManager) GetServiceList(cloudProviderName types.CloudProviderName) []string { +func (s *cloudServicesProvidersManager) GetServiceList(cloudProviderName types.CloudProviderName) []string { if s.cloudProviders[cloudProviderName] == nil || s.cloudProviders[cloudProviderName].IPRangeInfo == nil { return nil } @@ -176,8 +217,13 @@ func (s *CloudServicesProvidersManager) GetServiceList(cloudProviderName types.C return s.cloudProviders[cloudProviderName].IPRangeInfo.ServiceList } +// GetProviderByName retrieves a CloudServicesProvider instance by its name from the manager's collection of cloud providers. +func (s *cloudServicesProvidersManager) GetProviderByName(name types.CloudProviderName) *CloudServicesProvider { + return s.cloudProviders[name] +} + // pullIPRangesPerCloudProvider pull ip ranges of one cloud provider -func (s *CloudServicesProvidersManager) pullIPRangesPerCloudProvider(cloudProviderName types.CloudProviderName) error { +func (s *cloudServicesProvidersManager) pullIPRangesPerCloudProvider(cloudProviderName types.CloudProviderName) error { provider := s.cloudProviders[cloudProviderName] if provider == nil { return fmt.Errorf("cloud provider %s does not exist", cloudProviderName) @@ -210,12 +256,8 @@ func (s *CloudServicesProvidersManager) pullIPRangesPerCloudProvider(cloudProvid } // requestIPRangesFromProvider launches a HTTP GET request to pull the ip range json file from a url -func (s *CloudServicesProvidersManager) requestIPRangesFromProvider(url string) ([]byte, error) { - client := http.Client{ - Timeout: time.Second * 10, - } - - response, err := client.Get(url) +func (s *cloudServicesProvidersManager) requestIPRangesFromProvider(url string) ([]byte, error) { + response, err := s.client.Get(url) if err != nil { return nil, err } diff --git a/cloudservice/manager_test.go b/cloudservice/manager_test.go index 160b7043d..ccccb5087 100644 --- a/cloudservice/manager_test.go +++ b/cloudservice/manager_test.go @@ -6,6 +6,10 @@ package cloudservice import ( + "bytes" + "errors" + "io" + "net/http" "reflect" "testing" "time" @@ -13,6 +17,7 @@ import ( "github.com/DataDog/chaos-controller/cloudservice/gcp" "github.com/DataDog/chaos-controller/cloudservice/types" "github.com/DataDog/chaos-controller/log" + "github.com/DataDog/chaos-controller/mocks" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/stretchr/testify/mock" @@ -20,12 +25,22 @@ import ( func TestManager(t *testing.T) { RegisterFailHandler(Fail) - RunSpecs(t, "Cloudservice Manager Suite") + RunSpecs(t, "CloudService Manager Suite") } +const ( + AWSURL = "https://ip-ranges.amazonaws.com/ip-ranges.json" + GCPURL = "https://www.gstatic.com/ipranges/goog.json" + DatadogURL = "https://ip-ranges.datadoghq.com/" +) + var _ = Describe("New function", func() { - var manager *CloudServicesProvidersManager - var configs types.CloudProviderConfigs + + var ( + configs types.CloudProviderConfigs + manager CloudServicesProvidersManager + httpRoundTripperMock *mocks.RoundTripperMock + ) BeforeEach(func() { configs = types.CloudProviderConfigs{ @@ -33,45 +48,110 @@ var _ = Describe("New function", func() { PullInterval: time.Minute, AWS: types.CloudProviderConfig{ Enabled: true, - IPRangesURL: "https://ip-ranges.amazonaws.com/ip-ranges.json", + IPRangesURL: AWSURL, }, GCP: types.CloudProviderConfig{ Enabled: true, - IPRangesURL: "https://www.gstatic.com/ipranges/goog.json", + IPRangesURL: GCPURL, }, Datadog: types.CloudProviderConfig{ Enabled: true, - IPRangesURL: "https://ip-ranges.datadoghq.com/", + IPRangesURL: DatadogURL, }, } + httpRoundTripperMock = mocks.NewRoundTripperMock(GinkgoT()) + httpRoundTripperMock.EXPECT().RoundTrip(mock.Anything).RunAndReturn(func(request *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewReader([]byte(`{}`))), + }, nil + }).Maybe() }) JustBeforeEach(func() { var err error logger, _ := log.NewZapLogger() - manager, err = New(logger, configs) + httpClient := http.Client{ + Transport: httpRoundTripperMock, + } + manager, err = New(logger, configs, &httpClient) By("Ensuring that no error was thrown") Expect(err).ToNot(HaveOccurred()) }) Context("Creating a new manager with all providers enabled", func() { + BeforeEach(func() { + // Arrange + httpRoundTripperMock = mocks.NewRoundTripperMock(GinkgoT()) + httpRoundTripperMock.EXPECT().RoundTrip(mock.Anything).RunAndReturn(func(request *http.Request) (*http.Response, error) { + var body []byte + switch request.URL.String() { + case AWSURL: + body = []byte(`{ + "syncToken": "1693194189", + "createDate": "2023-08-28-03-43-09", + "prefixes": [ + { + "ip_prefix": "3.2.34.0/26", + "region": "af-south-1", + "service": "ROUTE53_RESOLVER", + "network_border_group": "af-south-1" + } + ] +}`) + case GCPURL: + body = []byte(`{ + "syncToken": "1693209970630", + "creationTime": "2023-08-28T01:06:10.63098", + "prefixes": [{ + "ipv4Prefix": "8.8.4.0/24" + }] +}`) + case DatadogURL: + body = []byte(`{ + "version": 54, + "modified": "2023-07-14-00-00-00", + "agents": { + "prefixes_ipv4": [ + "3.233.144.0/20" + ], + "prefixes_ipv6": [ + "2600:1f18:24e6:b900::/56" + ] + } +}`) + default: + return nil, errors.New("unknown URL") + } + + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewReader(body)), + }, nil + }).Maybe() + }) + It("should have parsed once", func() { + awsProvider := manager.GetProviderByName(types.CloudProviderAWS) + GCPProvider := manager.GetProviderByName(types.CloudProviderGCP) + DatadogProvider := manager.GetProviderByName(types.CloudProviderDatadog) + By("Ensuring that we have all cloud managed services") - Expect(manager.cloudProviders[types.CloudProviderAWS]).ToNot(BeNil()) - Expect(manager.cloudProviders[types.CloudProviderGCP]).ToNot(BeNil()) - Expect(manager.cloudProviders[types.CloudProviderDatadog]).ToNot(BeNil()) + Expect(awsProvider).ToNot(BeNil()) + Expect(GCPProvider).ToNot(BeNil()) + Expect(DatadogProvider).ToNot(BeNil()) By("Ensuring that the ips are parsed") - Expect(manager.cloudProviders[types.CloudProviderAWS].IPRangeInfo.IPRanges).ToNot(BeEmpty()) - Expect(manager.cloudProviders[types.CloudProviderGCP].IPRangeInfo.IPRanges).ToNot(BeEmpty()) - Expect(manager.cloudProviders[types.CloudProviderDatadog].IPRangeInfo.IPRanges).ToNot(BeEmpty()) + Expect(awsProvider.IPRangeInfo.IPRanges).ToNot(BeEmpty()) + Expect(GCPProvider.IPRangeInfo.IPRanges).ToNot(BeEmpty()) + Expect(DatadogProvider.IPRangeInfo.IPRanges).ToNot(BeEmpty()) By("Ensuring that we have a service list for every cloud provider") - Expect(manager.cloudProviders[types.CloudProviderAWS].IPRangeInfo.ServiceList).ToNot(BeEmpty()) - Expect(manager.cloudProviders[types.CloudProviderGCP].IPRangeInfo.ServiceList).ToNot(BeEmpty()) - Expect(manager.cloudProviders[types.CloudProviderDatadog].IPRangeInfo.ServiceList).ToNot(BeEmpty()) + Expect(awsProvider.IPRangeInfo.ServiceList).ToNot(BeEmpty()) + Expect(GCPProvider.IPRangeInfo.ServiceList).ToNot(BeEmpty()) + Expect(DatadogProvider.IPRangeInfo.ServiceList).ToNot(BeEmpty()) }) }) @@ -82,9 +162,9 @@ var _ = Describe("New function", func() { It("should have parsed once", func() { By("Ensuring that we have all cloud managed services") - Expect(manager.cloudProviders[types.CloudProviderAWS]).To(BeNil()) - Expect(manager.cloudProviders[types.CloudProviderGCP]).ToNot(BeNil()) - Expect(manager.cloudProviders[types.CloudProviderDatadog]).ToNot(BeNil()) + Expect(manager.GetProviderByName(types.CloudProviderAWS)).To(BeNil()) + Expect(manager.GetProviderByName(types.CloudProviderGCP)).ToNot(BeNil()) + Expect(manager.GetProviderByName(types.CloudProviderDatadog)).ToNot(BeNil()) }) }) @@ -95,60 +175,103 @@ var _ = Describe("New function", func() { It("should have parsed once", func() { By("Ensuring that we have all cloud managed services") - Expect(manager.cloudProviders[types.CloudProviderAWS]).To(BeNil()) - Expect(manager.cloudProviders[types.CloudProviderGCP]).To(BeNil()) - Expect(manager.cloudProviders[types.CloudProviderDatadog]).To(BeNil()) + Expect(manager.GetProviderByName(types.CloudProviderAWS)).To(BeNil()) + Expect(manager.GetProviderByName(types.CloudProviderGCP)).To(BeNil()) + Expect(manager.GetProviderByName(types.CloudProviderDatadog)).To(BeNil()) }) }) Context("Pull new ip ranges from aws and gcp", func() { - JustBeforeEach(func() { - manager.cloudProviders = map[types.CloudProviderName]*CloudServicesProvider{ - types.CloudProviderAWS: { - CloudProviderIPRangeManager: NewCloudServiceMock( - true, - nil, - "1", - []string{"S3", "EC2"}, - map[string][]string{ - "S3": { - "1.2.3.0/24", - "2.2.3.0/24", - }, - "EC2": { - "4.2.3.0/24", - "5.2.3.0/24", - }, - }, - nil, - ), - Conf: types.CloudProviderConfig{ - Enabled: true, - IPRangesURL: "https://ip-ranges.amazonaws.com/ip-ranges.json", - }, + BeforeEach(func() { + // Arrange + httpRoundTripperMock = mocks.NewRoundTripperMock(GinkgoT()) + httpRoundTripperMock.EXPECT().RoundTrip(mock.Anything).RunAndReturn(func(request *http.Request) (*http.Response, error) { + var body []byte + switch request.URL.String() { + case AWSURL: + body = []byte(`{ + "syncToken": "1693194189", + "createDate": "2023-08-28-03-43-09", + "prefixes": [ + { + "ip_prefix": "1.2.3.0/24", + "region": "af-south-1", + "service": "S3", + "network_border_group": "af-south-1" + }, + { + "ip_prefix": "2.2.3.0/24", + "region": "af-south-1", + "service": "S3", + "network_border_group": "af-south-1" + }, + { + "ip_prefix": "4.2.3.0/24", + "region": "af-south-1", + "service": "EC2", + "network_border_group": "af-south-1" + }, + { + "ip_prefix": "5.2.3.0/24", + "region": "af-south-1", + "service": "EC2", + "network_border_group": "af-south-1" + } + ] +}`) + case GCPURL: + body = []byte(`{ + "syncToken": "1693209970630", + "creationTime": "2023-08-28T01:06:10.63098", + "prefixes": [{ + "ipv4Prefix": "6.2.3.0/24" + }, + { + "ipv4Prefix": "7.2.3.0/24" + }, + { + "ipv4Prefix": "8.2.3.0/24" + }] +}`) + case DatadogURL: + body = []byte(`{ + "version": 54, + "modified": "2023-07-14-00-00-00", + "agents": { + "prefixes_ipv4": [ + "3.233.144.0/20" + ], + "prefixes_ipv6": [ + "2600:1f18:24e6:b900::/56" + ] + } +}`) + default: + return nil, errors.New("unknown URL") + } + return &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(bytes.NewReader(body)), + }, nil + }).Maybe() + + configs = types.CloudProviderConfigs{ + DisableAll: false, + PullInterval: time.Minute, + AWS: types.CloudProviderConfig{ + Enabled: true, + IPRangesURL: "https://ip-ranges.amazonaws.com/ip-ranges.json", }, - types.CloudProviderGCP: { - CloudProviderIPRangeManager: NewCloudServiceMock( - true, - nil, - "1", - []string{gcp.GoogleCloudService}, - map[string][]string{ - gcp.GoogleCloudService: { - "6.2.3.0/24", - "7.2.3.0/24", - "8.2.3.0/24", - }, - }, - nil, - ), - Conf: types.CloudProviderConfig{ - Enabled: true, - IPRangesURL: "https://www.gstatic.com/ipranges/goog.json", // General IP Ranges from Google, contains some API ip ranges - }, + GCP: types.CloudProviderConfig{ + Enabled: true, + IPRangesURL: "https://www.gstatic.com/ipranges/goog.json", }, } + }) + + JustBeforeEach(func() { + // Action err := manager.PullIPRanges() By("Ensuring that no error was thrown") @@ -156,12 +279,15 @@ var _ = Describe("New function", func() { }) It("should have parsed successfully the service list", func() { + awsProvider := manager.GetProviderByName(types.CloudProviderAWS) + GCPProvider := manager.GetProviderByName(types.CloudProviderGCP) + By("Ensuring that we have a service list for every cloud provider") - Expect(manager.cloudProviders[types.CloudProviderAWS].IPRangeInfo.ServiceList).ToNot(BeEmpty()) - Expect(manager.cloudProviders[types.CloudProviderGCP].IPRangeInfo.ServiceList).ToNot(BeEmpty()) + Expect(awsProvider.IPRangeInfo.ServiceList).ToNot(BeEmpty()) + Expect(GCPProvider.IPRangeInfo.ServiceList).ToNot(BeEmpty()) By("Ensuring aws service list is populated with the right information") - Expect(reflect.DeepEqual(manager.cloudProviders[types.CloudProviderAWS].IPRangeInfo.ServiceList, []string{ + Expect(reflect.DeepEqual(awsProvider.IPRangeInfo.ServiceList, []string{ "S3", "EC2", })).To(BeTrue()) @@ -171,7 +297,7 @@ var _ = Describe("New function", func() { })).To(BeTrue()) By("Ensuring gcp service list is populated with the right information") - Expect(reflect.DeepEqual(manager.cloudProviders[types.CloudProviderGCP].IPRangeInfo.ServiceList, []string{ + Expect(reflect.DeepEqual(GCPProvider.IPRangeInfo.ServiceList, []string{ gcp.GoogleCloudService, })).To(BeTrue()) Expect(reflect.DeepEqual(manager.GetServiceList(types.CloudProviderGCP), []string{ @@ -180,12 +306,15 @@ var _ = Describe("New function", func() { }) It("should have parsed successfully the ip ranges map", func() { + awsProvider := manager.GetProviderByName(types.CloudProviderAWS) + GCPProvider := manager.GetProviderByName(types.CloudProviderGCP) + By("Ensuring that we have an ip ranges map for every cloud provider") - Expect(manager.cloudProviders[types.CloudProviderAWS].IPRangeInfo.IPRanges).ToNot(BeEmpty()) - Expect(manager.cloudProviders[types.CloudProviderGCP].IPRangeInfo.IPRanges).ToNot(BeEmpty()) + Expect(awsProvider.IPRangeInfo.IPRanges).ToNot(BeEmpty()) + Expect(GCPProvider.IPRangeInfo.IPRanges).ToNot(BeEmpty()) By("Ensuring aws ip ranges map is populated with the right information") - Expect(reflect.DeepEqual(manager.cloudProviders[types.CloudProviderAWS].IPRangeInfo.IPRanges, map[string][]string{ + Expect(reflect.DeepEqual(awsProvider.IPRangeInfo.IPRanges, map[string][]string{ "S3": { "1.2.3.0/24", "2.2.3.0/24", @@ -211,7 +340,7 @@ var _ = Describe("New function", func() { })).To(BeTrue()) By("Ensuring gcp ip ranges map is populated with the right information") - Expect(reflect.DeepEqual(manager.cloudProviders[types.CloudProviderGCP].IPRangeInfo.IPRanges, map[string][]string{ + Expect(reflect.DeepEqual(GCPProvider.IPRangeInfo.IPRanges, map[string][]string{ gcp.GoogleCloudService: { "6.2.3.0/24", "7.2.3.0/24", @@ -232,18 +361,3 @@ var _ = Describe("New function", func() { }) }) }) - -func NewCloudServiceMock(isNewVersionMockValue bool, isNewVersionError error, convertToGenericIPRangesVersion string, convertToGenericIPRangesServiceList []string, convertToGenericIPRanges map[string][]string, convertToGenericIPRangesError error) *CloudProviderIPRangeManagerMock { - cloudProviderIPRangeMock := NewCloudProviderIPRangeManagerMock(GinkgoT()) - - cloudProviderIPRangeMock.EXPECT().ConvertToGenericIPRanges(mock.Anything).Return( - &types.CloudProviderIPRangeInfo{ - Version: convertToGenericIPRangesVersion, - IPRanges: convertToGenericIPRanges, - ServiceList: convertToGenericIPRangesServiceList, - }, - convertToGenericIPRangesError, - ) - - return cloudProviderIPRangeMock -} diff --git a/controllers/disruption_controller.go b/controllers/disruption_controller.go index 2cdcea8ab..8e411de5e 100644 --- a/controllers/disruption_controller.go +++ b/controllers/disruption_controller.go @@ -24,24 +24,21 @@ import ( "strings" "time" - chaosapi "github.com/DataDog/chaos-controller/api" - "github.com/DataDog/chaos-controller/cloudservice" + chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1" "github.com/DataDog/chaos-controller/o11y/metrics" "github.com/DataDog/chaos-controller/o11y/tracer" "github.com/DataDog/chaos-controller/safemode" + "github.com/DataDog/chaos-controller/services" "github.com/DataDog/chaos-controller/targetselector" chaostypes "github.com/DataDog/chaos-controller/types" "github.com/DataDog/chaos-controller/watchers" - "github.com/cenkalti/backoff" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" kubeinformers "k8s.io/client-go/informers" @@ -56,38 +53,23 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" - - chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1" - "github.com/DataDog/chaos-controller/env" ) // DisruptionReconciler reconciles a Disruption object type DisruptionReconciler struct { - Client client.Client - BaseLog *zap.SugaredLogger - Scheme *runtime.Scheme - Recorder record.EventRecorder - MetricsSink metrics.Sink - TracerSink tracer.Sink - TargetSelector targetselector.TargetSelector - InjectorAnnotations map[string]string - InjectorLabels map[string]string - InjectorServiceAccount string - InjectorImage string - ImagePullSecrets string - log *zap.SugaredLogger - ChaosNamespace string - InjectorDNSDisruptionDNSServer string - InjectorDNSDisruptionKubeDNS string - InjectorNetworkDisruptionAllowedHosts []string - SafetyNets []safemode.Safemode - ExpiredDisruptionGCDelay *time.Duration - CacheContextStore map[string]CtxTuple - Controller controller.Controller - Reader client.Reader // Use the k8s API without the cache - EnableObserver bool // Enable Observer on targets update with dynamic targeting - CloudServicesProvidersManager *cloudservice.CloudServicesProvidersManager - DisruptionsWatchersManager watchers.DisruptionsWatchersManager + Client client.Client + BaseLog *zap.SugaredLogger + Scheme *runtime.Scheme + Recorder record.EventRecorder + MetricsSink metrics.Sink + TracerSink tracer.Sink + TargetSelector targetselector.TargetSelector + log *zap.SugaredLogger + SafetyNets []safemode.Safemode + ExpiredDisruptionGCDelay *time.Duration + CacheContextStore map[string]CtxTuple + DisruptionsWatchersManager watchers.DisruptionsWatchersManager + ChaosPodService services.ChaosPodService } type CtxTuple struct { @@ -128,18 +110,18 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) return } - if isModifiedError(err) { + if chaosv1beta1.IsUpdateConflictError(err) { r.log.Infow("a retryable error occurred in reconcile loop", "error", err) } else { r.log.Errorw("an error occurred in reconcile loop", "error", err) } }() - if err := r.Client.Get(context.Background(), req.NamespacedName, instance); err != nil { + if err := r.Client.Get(ctx, req.NamespacedName, instance); err != nil { if client.IgnoreNotFound(err) == nil { // If we're reconciling but without an instance, then we must have been triggered by the pod informer // We should check for and delete any orphaned chaos pods - err = r.handleOrphanedChaosPods(req) + err = r.ChaosPodService.HandleOrphanedChaosPods(ctx, req) } return ctrl.Result{}, client.IgnoreNotFound(err) @@ -177,7 +159,7 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) r.log = r.log.With(r.TracerSink.GetLoggableTraceContext(reconcileSpan)...) // handle any chaos pods being deleted (either by the disruption deletion or by an external event) - if err := r.handleChaosPodsTermination(instance); err != nil { + if err := r.handleChaosPodsTermination(ctx, instance); err != nil { return ctrl.Result{}, fmt.Errorf("error handling chaos pods termination: %w", err) } @@ -185,7 +167,7 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) if !instance.DeletionTimestamp.IsZero() { // the instance is being deleted, clean it if the finalizer is still present if controllerutil.ContainsFinalizer(instance, chaostypes.DisruptionFinalizer) { - isCleaned, err := r.cleanDisruption(instance) + isCleaned, err := r.cleanDisruption(ctx, instance) if err != nil { return ctrl.Result{}, fmt.Errorf("error cleaning disruption: %w", err) } @@ -200,7 +182,7 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{ Requeue: true, RequeueAfter: requeueAfter, - }, r.Client.Update(context.Background(), instance) + }, r.Client.Update(ctx, instance) } // we reach this code when all the cleanup pods have succeeded @@ -211,7 +193,7 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) r.DisruptionsWatchersManager.RemoveAllWatchers(instance) controllerutil.RemoveFinalizer(instance, chaostypes.DisruptionFinalizer) - if err := r.Client.Update(context.Background(), instance); err != nil { + if err := r.Client.Update(ctx, instance); err != nil { return ctrl.Result{}, fmt.Errorf("error removing disruption finalizer: %w", err) } @@ -253,26 +235,26 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) // the injection is being created or modified, apply needed actions controllerutil.AddFinalizer(instance, chaostypes.DisruptionFinalizer) - if err := r.Client.Update(context.Background(), instance); err != nil { + if err := r.Client.Update(ctx, instance); err != nil { return ctrl.Result{}, fmt.Errorf("error adding disruption finalizer: %w", err) } // If the disruption is at least r.ExpiredDisruptionGCDelay older than when its duration ended, then we should delete it. // calculateRemainingDurationSeconds returns the seconds until (or since, if negative) the duration's deadline. We compare it to negative ExpiredDisruptionGCDelay, // and if less than that, it means we have exceeded the deadline by at least ExpiredDisruptionGCDelay, so we can delete - if r.ExpiredDisruptionGCDelay != nil && (calculateRemainingDuration(*instance) <= (-1 * *r.ExpiredDisruptionGCDelay)) { + if r.ExpiredDisruptionGCDelay != nil && (instance.RemainingDuration() <= (-1 * *r.ExpiredDisruptionGCDelay)) { r.log.Infow("disruption has lived for more than its duration, it will now be deleted.", "duration", instance.Spec.Duration) r.recordEventOnDisruption(instance, chaosv1beta1.EventDisruptionGCOver, r.ExpiredDisruptionGCDelay.String(), "") var err error - if err = r.Client.Delete(context.Background(), instance); err != nil { + if err = r.Client.Delete(ctx, instance); err != nil { r.log.Errorw("error deleting disruption after its duration expired", "error", err) } return ctrl.Result{Requeue: true}, err - } else if calculateRemainingDuration(*instance) <= 0 { - if err := r.updateInjectionStatus(instance); err != nil { + } else if instance.RemainingDuration() <= 0 { + if err := r.updateInjectionStatus(ctx, instance); err != nil { return ctrl.Result{}, fmt.Errorf("error updating disruption injection status: %w", err) } @@ -292,7 +274,7 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) } // check if we have reached trigger.createPods. If not, skip the rest of reconciliation. - requeueAfter := time.Until(TimeToCreatePods(instance.Spec.Triggers, instance.CreationTimestamp.Time)) + requeueAfter := time.Until(instance.TimeToCreatePods()) if requeueAfter > (time.Second * 5) { requeueAfter -= (time.Second * 5) r.log.Debugw("requeuing disruption as we haven't yet reached trigger.createPods", "requeueAfter", requeueAfter.String()) @@ -301,12 +283,12 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) } // retrieve targets from label selector - if err := r.selectTargets(instance); err != nil { + if err := r.selectTargets(ctx, instance); err != nil { return ctrl.Result{}, fmt.Errorf("error selecting targets: %w", err) } // start injections - if err := r.startInjection(instance); err != nil { + if err := r.startInjection(ctx, instance); err != nil { return ctrl.Result{}, fmt.Errorf("error creating chaos pods to start the disruption: %w", err) } @@ -315,7 +297,7 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) // update resource status injection // requeue the request if the disruption is not fully notFullyInjected yet - err := r.updateInjectionStatus(instance) + err := r.updateInjectionStatus(ctx, instance) if err != nil { return ctrl.Result{}, fmt.Errorf("error updating disruption injection status: %w", err) } else if instance.Status.InjectionStatus.NotFullyInjected() { @@ -329,7 +311,7 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) }, nil } - disruptionEndAt := calculateRemainingDuration(*instance) + time.Second + disruptionEndAt := instance.RemainingDuration() + time.Second r.log.Infow("requeuing disruption to check once expired", "requeueDelay", disruptionEndAt) @@ -337,7 +319,7 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) Requeue: true, RequeueAfter: disruptionEndAt, }, - r.Client.Update(context.Background(), instance) + r.Client.Update(ctx, instance) } // stop the reconcile loop, there's nothing else to do @@ -349,7 +331,7 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) // - an instance with at least one chaos pod as "ready" is considered as "partially injected" // - an instance with no ready chaos pods is considered as "not injected" // - an instance expired will have previously defined status prefixed with "previously" -func (r *DisruptionReconciler) updateInjectionStatus(instance *chaosv1beta1.Disruption) (err error) { +func (r *DisruptionReconciler) updateInjectionStatus(ctx context.Context, instance *chaosv1beta1.Disruption) (err error) { r.log.Debugw("checking if injection status needs to be updated", "injectionStatus", instance.Status.InjectionStatus) defer func() { @@ -359,7 +341,7 @@ func (r *DisruptionReconciler) updateInjectionStatus(instance *chaosv1beta1.Disr readyPodsCount := 0 // get chaos pods - chaosPods, err := r.getChaosPods(instance, nil) + chaosPods, err := r.ChaosPodService.GetChaosPodsOfDisruption(ctx, instance, nil) if err != nil { return fmt.Errorf("error getting instance chaos pods: %w", err) } @@ -369,15 +351,15 @@ func (r *DisruptionReconciler) updateInjectionStatus(instance *chaosv1beta1.Disr status = chaostypes.DisruptionInjectionStatusNotInjected } - terminationStatus := disruptionTerminationStatus(*instance, chaosPods) - if terminationStatus != tsNotTerminated { + terminationStatus := instance.TerminationStatus(chaosPods) + if terminationStatus != chaosv1beta1.TSNotTerminated { switch status { case chaostypes.DisruptionInjectionStatusInjected, chaostypes.DisruptionInjectionStatusPausedInjected, chaostypes.DisruptionInjectionStatusPreviouslyInjected: status = chaostypes.DisruptionInjectionStatusPausedInjected - if terminationStatus == tsDefinitivelyTerminated { + if terminationStatus == chaosv1beta1.TSDefinitivelyTerminated { status = chaostypes.DisruptionInjectionStatusPreviouslyInjected } case @@ -385,7 +367,7 @@ func (r *DisruptionReconciler) updateInjectionStatus(instance *chaosv1beta1.Disr chaostypes.DisruptionInjectionStatusPausedPartiallyInjected, chaostypes.DisruptionInjectionStatusPreviouslyPartiallyInjected: status = chaostypes.DisruptionInjectionStatusPausedPartiallyInjected - if terminationStatus == tsDefinitivelyTerminated { + if terminationStatus == chaosv1beta1.TSDefinitivelyTerminated { status = chaostypes.DisruptionInjectionStatusPreviouslyPartiallyInjected } case @@ -393,7 +375,7 @@ func (r *DisruptionReconciler) updateInjectionStatus(instance *chaosv1beta1.Disr chaostypes.DisruptionInjectionStatusPreviouslyNotInjected: // NB: we can't be PausedNotInjected, it's NotInjected status = chaostypes.DisruptionInjectionStatusNotInjected - if terminationStatus == tsDefinitivelyTerminated { + if terminationStatus == chaosv1beta1.TSDefinitivelyTerminated { status = chaostypes.DisruptionInjectionStatusPreviouslyNotInjected } default: @@ -451,7 +433,7 @@ func (r *DisruptionReconciler) updateInjectionStatus(instance *chaosv1beta1.Disr instance.Status.InjectedTargetsCount = int(math.Floor(float64(readyPodsCount) / float64(instance.Spec.DisruptionCount()))) } - if err := r.Client.Status().Update(context.Background(), instance); err != nil { + if err := r.Client.Status().Update(ctx, instance); err != nil { return fmt.Errorf("unable to update disruption injection status: %w", err) } @@ -459,11 +441,11 @@ func (r *DisruptionReconciler) updateInjectionStatus(instance *chaosv1beta1.Disr } // startInjection creates non-existing chaos pod for the given disruption -func (r *DisruptionReconciler) startInjection(instance *chaosv1beta1.Disruption) error { +func (r *DisruptionReconciler) startInjection(ctx context.Context, instance *chaosv1beta1.Disruption) error { // chaosPodsMap is used to check if a target's chaos pods already exist or not chaosPodsMap := make(map[string]map[string]bool, len(instance.Status.TargetInjections)) - chaosPods, err := r.getChaosPods(instance, nil) + chaosPods, err := r.ChaosPodService.GetChaosPodsOfDisruption(ctx, instance, nil) if err != nil { return fmt.Errorf("error getting chaos pods: %w", err) } @@ -475,7 +457,7 @@ func (r *DisruptionReconciler) startInjection(instance *chaosv1beta1.Disruption) for _, chaosPod := range chaosPods { if !instance.Status.HasTarget(chaosPod.Labels[chaostypes.TargetLabel]) { - r.deleteChaosPod(instance, chaosPod) + r.deleteChaosPod(ctx, instance, chaosPod) } else { chaosPodsMap[chaosPod.Labels[chaostypes.TargetLabel]][chaosPod.Labels[chaostypes.DisruptionKindLabel]] = true } @@ -501,7 +483,7 @@ func (r *DisruptionReconciler) startInjection(instance *chaosv1beta1.Disruption) continue } - if err = r.createChaosPods(instance, targetName); err != nil { + if err = r.createChaosPods(ctx, instance, targetName); err != nil { if !apierrors.IsNotFound(err) { return fmt.Errorf("error creating chaos pods: %w", err) } @@ -517,7 +499,7 @@ func (r *DisruptionReconciler) startInjection(instance *chaosv1beta1.Disruption) } // createChaosPods attempts to create all the chaos pods for a given target. If a given chaos pod already exists, it is not recreated. -func (r *DisruptionReconciler) createChaosPods(instance *chaosv1beta1.Disruption, target string) error { +func (r *DisruptionReconciler) createChaosPods(ctx context.Context, instance *chaosv1beta1.Disruption, target string) error { var err error targetNodeName := "" @@ -529,7 +511,7 @@ func (r *DisruptionReconciler) createChaosPods(instance *chaosv1beta1.Disruption case chaostypes.DisruptionLevelPod: pod := corev1.Pod{} - if err := r.Client.Get(context.Background(), types.NamespacedName{Namespace: instance.Namespace, Name: target}, &pod); err != nil { + if err := r.Client.Get(ctx, types.NamespacedName{Namespace: instance.Namespace, Name: target}, &pod); err != nil { return fmt.Errorf("error getting target to inject: %w", err) } @@ -552,7 +534,7 @@ func (r *DisruptionReconciler) createChaosPods(instance *chaosv1beta1.Disruption } // generate injection pods specs - targetChaosPods, err := r.generateChaosPods(instance, target, targetNodeName, targetContainers, targetPodIP) + targetChaosPods, err := r.ChaosPodService.GenerateChaosPodsOfDisruption(instance, target, targetNodeName, targetContainers, targetPodIP) if err != nil { return fmt.Errorf("error generating chaos pods: %w", err) } @@ -563,8 +545,8 @@ func (r *DisruptionReconciler) createChaosPods(instance *chaosv1beta1.Disruption return nil } - if calculateRemainingDuration(*instance).Seconds() < 1 { - r.log.Debugw("skipping creation of chaos pods, remaining duration is too small", "remainingDuration", calculateRemainingDuration(*instance).String()) + if instance.RemainingDuration().Seconds() < 1 { + r.log.Debugw("skipping creation of chaos pods, remaining duration is too small", "remainingDuration", instance.RemainingDuration().String()) return nil } @@ -572,7 +554,7 @@ func (r *DisruptionReconciler) createChaosPods(instance *chaosv1beta1.Disruption // create injection pods for _, targetChaosPod := range targetChaosPods { // check if an injection pod already exists for the given (instance, namespace, disruption kind) tuple - found, err := r.getChaosPods(instance, targetChaosPod.Labels) + found, err := r.ChaosPodService.GetChaosPodsOfDisruption(ctx, instance, targetChaosPod.Labels) if err != nil { return fmt.Errorf("error getting existing chaos pods: %w", err) } @@ -580,11 +562,11 @@ func (r *DisruptionReconciler) createChaosPods(instance *chaosv1beta1.Disruption // create injection pods if none have been found switch len(found) { case 0: - chaosPodArgs := r.getChaosPodInjectorArgs(targetChaosPod) + chaosPodArgs := r.ChaosPodService.GetPodInjectorArgs(targetChaosPod) r.log.Infow("creating chaos pod", "target", target, "chaosPodArgs", chaosPodArgs) // create the pod - if err = r.Client.Create(context.Background(), &targetChaosPod); err != nil { + if err = r.ChaosPodService.CreatePod(ctx, &targetChaosPod); err != nil { r.recordEventOnDisruption(instance, chaosv1beta1.EventDisruptionCreationFailed, instance.Name, target) r.handleMetricSinkError(r.MetricsSink.MetricPodsCreated(target, instance.Name, instance.Namespace, false)) @@ -592,7 +574,7 @@ func (r *DisruptionReconciler) createChaosPods(instance *chaosv1beta1.Disruption } // wait for the pod to be existing - if err := r.waitForPodCreation(&targetChaosPod); err != nil { + if err := r.ChaosPodService.WaitForPodCreation(ctx, targetChaosPod); err != nil { r.log.Errorw("error waiting for chaos pod to be created", "error", err, "chaosPod", targetChaosPod.Name, "target", target) continue @@ -600,7 +582,7 @@ func (r *DisruptionReconciler) createChaosPods(instance *chaosv1beta1.Disruption // send metrics and events r.recordEventOnDisruption(instance, chaosv1beta1.EventDisruptionChaosPodCreated, instance.Name, target) - r.recordEventOnTarget(instance, target, chaosv1beta1.EventDisrupted, targetChaosPod.Name, instance.Name) + r.recordEventOnTarget(ctx, instance, target, chaosv1beta1.EventDisrupted, targetChaosPod.Name, instance.Name) r.handleMetricSinkError(r.MetricsSink.MetricPodsCreated(target, instance.Name, instance.Namespace, true)) case 1: r.log.Debugw("an injection pod is already existing for the selected target", "target", target, "chaosPod", found[0].Name) @@ -617,53 +599,15 @@ func (r *DisruptionReconciler) createChaosPods(instance *chaosv1beta1.Disruption return nil } -func (r *DisruptionReconciler) getChaosPodInjectorArgs(chaosPod corev1.Pod) []string { - chaosPodArgs := []string{} - - if len(chaosPod.Spec.Containers) > 0 { - for _, container := range chaosPod.Spec.Containers { - if container.Name == "injector" { - chaosPodArgs = container.Args - } - } - - if len(chaosPodArgs) == 0 { - r.log.Warnw("unable to find the args for this chaos pod", "chaosPodName", chaosPod.Name, "chaosPodSpec", chaosPod.Spec, "chaosPodContainerCount", len(chaosPod.Spec.Containers)) - } - } else { - r.log.Errorw("no containers found in chaos pod spec", "chaosPodSpec", chaosPod.Spec) - } - - return chaosPodArgs -} - -// waitForPodCreation waits for the given pod to be created -// it tries to get the pod using an exponential backoff with a max retry interval of 1 second and a max duration of 30 seconds -// if an unexpected error occurs (an error other than a "not found" error), the retry loop is stopped -func (r *DisruptionReconciler) waitForPodCreation(pod *corev1.Pod) error { - expBackoff := backoff.NewExponentialBackOff() - expBackoff.MaxInterval = time.Second - expBackoff.MaxElapsedTime = 30 * time.Second - - return backoff.Retry(func() error { - err := r.Client.Get(context.Background(), types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, pod) - if client.IgnoreNotFound(err) != nil { - return backoff.Permanent(err) - } - - return err - }, expBackoff) -} - // cleanDisruption triggers the cleanup of the given instance // for each existing chaos pod for the given instance, the function will delete the chaos pod to trigger its cleanup phase // the function returns true when no more chaos pods are existing (meaning that it keeps returning false if some pods // are deleted but still present) -func (r *DisruptionReconciler) cleanDisruption(instance *chaosv1beta1.Disruption) (bool, error) { +func (r *DisruptionReconciler) cleanDisruption(ctx context.Context, instance *chaosv1beta1.Disruption) (bool, error) { cleaned := true // get already existing chaos pods for the given disruption - chaosPods, err := r.getChaosPods(instance, nil) + chaosPods, err := r.ChaosPodService.GetChaosPodsOfDisruption(ctx, instance, nil) if err != nil { return false, err } @@ -676,62 +620,12 @@ func (r *DisruptionReconciler) cleanDisruption(instance *chaosv1beta1.Disruption // terminate running chaos pods to trigger cleanup for _, chaosPod := range chaosPods { - r.deleteChaosPod(instance, chaosPod) + r.deleteChaosPod(ctx, instance, chaosPod) } return cleaned, nil } -func (r *DisruptionReconciler) handleOrphanedChaosPods(req ctrl.Request) error { - ls := make(map[string]string) - - ls[chaostypes.DisruptionNameLabel] = req.Name - ls[chaostypes.DisruptionNamespaceLabel] = req.Namespace - - chaosPods, err := r.getChaosPods(nil, ls) - if err != nil { - return err - } - - for _, chaosPod := range chaosPods { - r.handleMetricSinkError(r.MetricsSink.MetricOrphanFound([]string{"disruption:" + req.Name, "chaosPod:" + chaosPod.Name, "namespace:" + req.Namespace})) - target := chaosPod.Labels[chaostypes.TargetLabel] - - var p corev1.Pod - - r.log.Infow("checking if we can clean up orphaned chaos pod", "chaosPod", chaosPod.Name, "target", target) - - // if target doesn't exist, we can try to clean up the chaos pod - if err := r.Client.Get(context.Background(), types.NamespacedName{Name: target, Namespace: req.Namespace}, &p); apierrors.IsNotFound(err) { - r.log.Warnw("orphaned chaos pod detected, will attempt to delete", "chaosPod", chaosPod.Name) - controllerutil.RemoveFinalizer(&chaosPod, chaostypes.ChaosPodFinalizer) - - if err := r.Client.Update(context.Background(), &chaosPod); err != nil { - if isModifiedError(err) { - r.log.Infow("retryable error removing chaos pod finalizer", "error", err, "chaosPod", chaosPod.Name) - } else { - r.log.Errorw("error removing chaos pod finalizer", "error", err, "chaosPod", chaosPod.Name) - } - - continue - } - - // if the chaos pod still exists after having its finalizer removed, delete it - if err := r.Client.Delete(context.Background(), &chaosPod); client.IgnoreNotFound(err) != nil { - if isModifiedError(err) { - r.log.Infow("retryable error deleting orphaned chaos pod", "error", err, "chaosPod", chaosPod.Name) - } else { - r.log.Errorw("error deleting orphaned chaos pod", "error", err, "chaosPod", chaosPod.Name) - } - - continue - } - } - } - - return nil -} - // handleChaosPodsTermination looks at the given instance chaos pods status to handle any terminated pods // such pods will have their finalizer removed, so they can be garbage collected by Kubernetes // the finalizer is removed if: @@ -742,9 +636,9 @@ func (r *DisruptionReconciler) handleOrphanedChaosPods(req ctrl.Request) error { // if a finalizer can't be removed because none of the conditions above are fulfilled, the instance is flagged // as stuck on removal and the pod finalizer won't be removed unless someone does it manually // the pod target will be moved to ignored targets, so it is not picked up by the next reconcile loop -func (r *DisruptionReconciler) handleChaosPodsTermination(instance *chaosv1beta1.Disruption) error { +func (r *DisruptionReconciler) handleChaosPodsTermination(ctx context.Context, instance *chaosv1beta1.Disruption) error { // get already existing chaos pods for the given disruption - chaosPods, err := r.getChaosPods(instance, nil) + chaosPods, err := r.ChaosPodService.GetChaosPodsOfDisruption(ctx, instance, nil) if err != nil { return err } @@ -754,111 +648,39 @@ func (r *DisruptionReconciler) handleChaosPodsTermination(instance *chaosv1beta1 } for _, chaosPod := range chaosPods { - r.handleChaosPodTermination(instance, chaosPod) + r.handleChaosPodTermination(ctx, instance, chaosPod) } - return r.Client.Status().Update(context.Background(), instance) + return r.Client.Status().Update(ctx, instance) } -func (r *DisruptionReconciler) handleChaosPodTermination(instance *chaosv1beta1.Disruption, chaosPod corev1.Pod) { - removeFinalizer := false - ignoreStatus := false - target := chaosPod.Labels[chaostypes.TargetLabel] - - // ignore chaos pods not being deleted or not having the finalizer anymore - if chaosPod.DeletionTimestamp.IsZero() || !controllerutil.ContainsFinalizer(&chaosPod, chaostypes.ChaosPodFinalizer) { +func (r *DisruptionReconciler) handleChaosPodTermination(ctx context.Context, instance *chaosv1beta1.Disruption, chaosPod corev1.Pod) { + // ignore chaos pods not being deleted + if chaosPod.DeletionTimestamp.IsZero() { return } - // check target readiness for cleanup - // ignore it if it is not ready anymore - err := r.TargetSelector.TargetIsHealthy(target, r.Client, instance) + isFinalizerRemoved, err := r.ChaosPodService.HandleChaosPodTermination(ctx, instance, &chaosPod) if err != nil { - if apierrors.IsNotFound(err) || strings.ToLower(err.Error()) == "pod is not running" || strings.ToLower(err.Error()) == "node is not ready" { - // if the target is not in a good shape, we still run the cleanup phase but we don't check for any issues happening during - // the cleanup to avoid blocking the disruption deletion for nothing - r.log.Infow("target is not likely to be cleaned (either it does not exist anymore or it is not ready), the injector will TRY to clean it but will not take care about any failures", "target", target) - - // by enabling this, we will remove the target associated chaos pods finalizers and delete them to trigger the cleanup phase - // but the chaos pods status will not be checked - ignoreStatus = true - } else { - r.log.Error(err.Error()) - - return - } - } + r.log.Errorw("could not handle the chaos pod termination", "error", err, "chaosPod", chaosPod.Name) - // It is always safe to remove some chaos pods. It is usually hard to tell if these chaos pods have - // succeeded or not, but they have no possibility of leaving side effects, so we choose to always remove the finalizer. - if chaosv1beta1.DisruptionHasNoSideEffects(chaosPod.Labels[chaostypes.DisruptionKindLabel]) { - removeFinalizer = true - ignoreStatus = true + return } - // check the chaos pod status to determine if we can safely delete it or not - switch chaosPod.Status.Phase { - case corev1.PodSucceeded, corev1.PodPending: - // pod has terminated or is pending - // we can remove the pod and the finalizer, so that it'll be garbage collected - removeFinalizer = true - case corev1.PodFailed: - // pod has failed - // we need to determine if we can remove it safely or if we need to block disruption deletion - // check if a container has been created (if not, the disruption was not injected) - if len(chaosPod.Status.ContainerStatuses) == 0 { - removeFinalizer = true - } - - // if the pod died only because it exceeded its activeDeadlineSeconds, we can remove the finalizer - if chaosPod.Status.Reason == "DeadlineExceeded" { - removeFinalizer = true - } - - // check if the container was able to start or not - // if not, we can safely delete the pod since the disruption was not injected - for _, cs := range chaosPod.Status.ContainerStatuses { - if cs.Name == "injector" { - if cs.State.Terminated != nil && cs.State.Terminated.Reason == "StartError" { - removeFinalizer = true - } - - break - } - } - default: - if !ignoreStatus { - // ignoring any pods not being in a "terminated" state - // if the target is not healthy, we clean up this pod regardless of its state - return - } + if isFinalizerRemoved { + return } - // remove the finalizer if possible or if we can ignore the cleanup status - if removeFinalizer || ignoreStatus { - r.log.Infow("chaos pod completed, removing finalizer", "target", target, "chaosPod", chaosPod.Name) + target := chaosPod.Labels[chaostypes.TargetLabel] - controllerutil.RemoveFinalizer(&chaosPod, chaostypes.ChaosPodFinalizer) + // if the chaos pod finalizer must not be removed and the chaos pod must not be deleted + // and the cleanup status must not be ignored, we are stuck and won't be able to remove the disruption + r.log.Infow("instance seems stuck on removal for this target, please check manually", "target", target, "chaosPod", chaosPod.Name) + r.recordEventOnDisruption(instance, chaosv1beta1.EventDisruptionStuckOnRemoval, "", target) - if err := r.Client.Update(context.Background(), &chaosPod); err != nil { - if strings.Contains(err.Error(), "latest version and try again") { - r.log.Debugw("cannot remove chaos pod finalizer, need to re-reconcile", "error", err) - } else { - r.log.Errorw("error removing chaos pod finalizer", "error", err, "chaosPod", chaosPod.Name) - } + instance.Status.IsStuckOnRemoval = true - return - } - } else { - // if the chaos pod finalizer must not be removed and the chaos pod must not be deleted - // and the cleanup status must not be ignored, we are stuck and won't be able to remove the disruption - r.log.Infow("instance seems stuck on removal for this target, please check manually", "target", target, "chaosPod", chaosPod.Name) - r.recordEventOnDisruption(instance, chaosv1beta1.EventDisruptionStuckOnRemoval, "", target) - - instance.Status.IsStuckOnRemoval = true - - r.updateTargetInjectionStatus(instance, chaosPod, chaostypes.DisruptionTargetInjectionStatusStatusIsStuckOnRemoval, *chaosPod.DeletionTimestamp) - } + r.updateTargetInjectionStatus(instance, chaosPod, chaostypes.DisruptionTargetInjectionStatusStatusIsStuckOnRemoval, *chaosPod.DeletionTimestamp) } func (r *DisruptionReconciler) updateTargetInjectionStatus(instance *chaosv1beta1.Disruption, chaosPod corev1.Pod, status chaostypes.DisruptionTargetInjectionStatus, since metav1.Time) { @@ -874,7 +696,7 @@ func (r *DisruptionReconciler) updateTargetInjectionStatus(instance *chaosv1beta // targets will only be selected once per instance // the chosen targets names will be reflected in the instance status // subsequent calls to this function will always return the same targets as the first call -func (r *DisruptionReconciler) selectTargets(instance *chaosv1beta1.Disruption) error { +func (r *DisruptionReconciler) selectTargets(ctx context.Context, instance *chaosv1beta1.Disruption) error { if len(instance.Status.TargetInjections) != 0 && instance.Spec.StaticTargeting { return nil } @@ -883,7 +705,7 @@ func (r *DisruptionReconciler) selectTargets(instance *chaosv1beta1.Disruption) // validate the given label selector to avoid any formatting issues due to special chars if instance.Spec.Selector != nil { - if err := validateLabelSelector(instance.Spec.Selector.AsSelector()); err != nil { + if err := targetselector.ValidateLabelSelector(instance.Spec.Selector.AsSelector()); err != nil { r.recordEventOnDisruption(instance, chaosv1beta1.EventInvalidDisruptionLabelSelector, err.Error(), "") return err @@ -898,13 +720,13 @@ func (r *DisruptionReconciler) selectTargets(instance *chaosv1beta1.Disruption) instance.Status.RemoveDeadTargets(matchingTargets) // instance.Spec.Count is a string that either represents a percentage or a value, we do the translation here - targetsCount, err := getScaledValueFromIntOrPercent(instance.Spec.Count, len(matchingTargets), true) + targetsCount, err := instance.GetTargetsCountAsInt(len(matchingTargets), true) if err != nil { targetsCount = instance.Spec.Count.IntValue() } // filter matching targets to only get eligible ones - eligibleTargets, err := r.getEligibleTargets(instance, matchingTargets) + eligibleTargets, err := r.getEligibleTargets(ctx, instance, matchingTargets) if err != nil { return fmt.Errorf("error getting eligible targets: %w", err) } @@ -940,7 +762,7 @@ func (r *DisruptionReconciler) selectTargets(instance *chaosv1beta1.Disruption) instance.Status.SelectedTargetsCount = len(instance.Status.TargetInjections) instance.Status.IgnoredTargetsCount = totalAvailableTargetsCount - targetsCount - return r.Client.Status().Update(context.Background(), instance) + return r.Client.Status().Update(ctx, instance) } // getMatchingTargets fetches all existing target fitting the disruption's selector @@ -986,239 +808,14 @@ func (r *DisruptionReconciler) getSelectorMatchingTargets(instance *chaosv1beta1 } // deleteChaosPods deletes a chaos pod using the client -func (r *DisruptionReconciler) deleteChaosPod(instance *chaosv1beta1.Disruption, chaosPod corev1.Pod) { +func (r *DisruptionReconciler) deleteChaosPod(ctx context.Context, instance *chaosv1beta1.Disruption, chaosPod corev1.Pod) { // delete the chaos pod only if it has not been deleted already if chaosPod.DeletionTimestamp.IsZero() { - r.log.Infow("terminating chaos pod to trigger cleanup", "chaosPod", chaosPod.Name) - - if err := r.Client.Delete(context.Background(), &chaosPod); client.IgnoreNotFound(err) != nil { - r.log.Errorw("error terminating chaos pod", "error", err, "chaosPod", chaosPod.Name) - } - - r.handleChaosPodTermination(instance, chaosPod) + r.ChaosPodService.DeletePod(ctx, chaosPod) + r.handleChaosPodTermination(ctx, instance, chaosPod) } } -func (r *DisruptionReconciler) getChaosPods(instance *chaosv1beta1.Disruption, ls labels.Set) ([]corev1.Pod, error) { - return chaosv1beta1.GetChaosPods(context.Background(), r.log, r.ChaosNamespace, r.Client, instance, ls) -} - -// generatePod generates a pod from a generic pod template in the same namespace -// and on the same node as the given pod -func (r *DisruptionReconciler) generatePod(instance *chaosv1beta1.Disruption, targetName string, targetNodeName string, args []string, kind chaostypes.DisruptionKindName) (pod corev1.Pod) { - // volume host path type definitions - hostPathDirectory := corev1.HostPathDirectory - hostPathFile := corev1.HostPathFile - - // The default TerminationGracePeriodSeconds is 30s. This can be too low for a chaos pod to finish cleaning. After TGPS passes, - // the signal sent to a pod becomes SIGKILL, which will interrupt any in-progress cleaning. By double this to 1 minute in the pod spec itself, - // ensures that whether a chaos pod is deleted directly or by deleting a disruption, it will have time to finish cleaning up after itself. - terminationGracePeriod := int64(60) - // Chaos pods will clean themselves automatically when duration expires, so we set activeDeadlineSeconds to ten seconds after that - // to give time for cleaning - activeDeadlineSeconds := int64(calculateRemainingDuration(*instance).Seconds()) + 10 - args = append(args, - "--deadline", time.Now().Add(calculateRemainingDuration(*instance)).Format(time.RFC3339)) - - podSpec := corev1.PodSpec{ - HostPID: true, // enable host pid - RestartPolicy: corev1.RestartPolicyNever, // do not restart the pod on fail or completion - NodeName: targetNodeName, // specify node name to schedule the pod - ServiceAccountName: r.InjectorServiceAccount, // service account to use - TerminationGracePeriodSeconds: &terminationGracePeriod, - ActiveDeadlineSeconds: &activeDeadlineSeconds, - Containers: []corev1.Container{ - { - Name: "injector", // container name - Image: r.InjectorImage, // container image gathered from controller flags - ImagePullPolicy: corev1.PullIfNotPresent, // pull the image only when it is not present - Args: args, // pass disruption arguments - SecurityContext: &corev1.SecurityContext{ - Privileged: func() *bool { b := true; return &b }(), // enable privileged mode - }, - ReadinessProbe: &corev1.Probe{ // define readiness probe (file created by the injector when the injection is successful) - PeriodSeconds: 1, - FailureThreshold: 5, - ProbeHandler: corev1.ProbeHandler{ - Exec: &corev1.ExecAction{ - Command: []string{"test", "-f", "/tmp/readiness_probe"}, - }, - }, - }, - Resources: corev1.ResourceRequirements{ // set resources requests and limits to zero - Limits: corev1.ResourceList{ - corev1.ResourceCPU: *resource.NewQuantity(0, resource.DecimalSI), - corev1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI), - }, - Requests: corev1.ResourceList{ - corev1.ResourceCPU: *resource.NewQuantity(0, resource.DecimalSI), - corev1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI), - }, - }, - Env: []corev1.EnvVar{ // define environment variables - { - Name: env.InjectorTargetPodHostIP, - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - FieldPath: "status.hostIP", - }, - }, - }, - { - Name: env.InjectorChaosPodIP, - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - FieldPath: "status.podIP", - }, - }, - }, - { - Name: env.InjectorPodName, - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - FieldPath: "metadata.name", - }, - }, - }, - { - Name: env.InjectorMountHost, - Value: "/mnt/host/", - }, - { - Name: env.InjectorMountProc, - Value: "/mnt/host/proc/", - }, - { - Name: env.InjectorMountSysrq, - Value: "/mnt/sysrq", - }, - { - Name: env.InjectorMountSysrqTrigger, - Value: "/mnt/sysrq-trigger", - }, - { - Name: env.InjectorMountCgroup, - Value: "/mnt/cgroup/", - }, - }, - VolumeMounts: []corev1.VolumeMount{ // define volume mounts required for disruptions to work - { - Name: "run", - MountPath: "/run", - }, - { - Name: "sysrq", - MountPath: "/mnt/sysrq", - }, - { - Name: "sysrq-trigger", - MountPath: "/mnt/sysrq-trigger", - }, - { - Name: "cgroup", - MountPath: "/mnt/cgroup", - }, - { - Name: "host", - MountPath: "/mnt/host", - ReadOnly: true, - }, - }, - }, - }, - Volumes: []corev1.Volume{ // declare volumes required for disruptions to work - { - Name: "run", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/run", - Type: &hostPathDirectory, - }, - }, - }, - { - Name: "proc", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/proc", - Type: &hostPathDirectory, - }, - }, - }, - { - Name: "sysrq", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/proc/sys/kernel/sysrq", - Type: &hostPathFile, - }, - }, - }, - { - Name: "sysrq-trigger", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/proc/sysrq-trigger", - Type: &hostPathFile, - }, - }, - }, - { - Name: "cgroup", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/sys/fs/cgroup", - Type: &hostPathDirectory, - }, - }, - }, - { - Name: "host", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/", - Type: &hostPathDirectory, - }, - }, - }, - }, - } - - if r.ImagePullSecrets != "" { - podSpec.ImagePullSecrets = []corev1.LocalObjectReference{ - { - Name: r.ImagePullSecrets, - }, - } - } - - podLabels := make(map[string]string) - for k, v := range r.InjectorLabels { - podLabels[k] = v - } - - podLabels[chaostypes.TargetLabel] = targetName // target name label - podLabels[chaostypes.DisruptionKindLabel] = string(kind) // disruption kind label - podLabels[chaostypes.DisruptionNameLabel] = instance.Name // disruption name label, used to determine ownership - podLabels[chaostypes.DisruptionNamespaceLabel] = instance.Namespace // disruption namespace label, used to determine ownership - - // define injector pod - pod = corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: fmt.Sprintf("chaos-%s-", instance.Name), // generate the pod name automatically with a prefix - Namespace: r.ChaosNamespace, // chaos pods need to be in the same namespace as their service account to run - Annotations: r.InjectorAnnotations, // add extra annotations passed to the controller - Labels: podLabels, // add default and extra podLabels passed to the controller - }, - Spec: podSpec, - } - - // add finalizer to the pod so it is not deleted before we can control its exit status - controllerutil.AddFinalizer(&pod, chaostypes.ChaosPodFinalizer) - - return pod -} - // handleMetricSinkError logs the given metric sink error if it is not nil func (r *DisruptionReconciler) handleMetricSinkError(err error) { if err != nil { @@ -1260,87 +857,15 @@ func (r *DisruptionReconciler) validateDisruptionSpec(instance *chaosv1beta1.Dis return nil } -// generateChaosPods generates a chaos pod for the given instance and disruption kind if set -func (r *DisruptionReconciler) generateChaosPods(instance *chaosv1beta1.Disruption, targetName string, targetNodeName string, targetContainers map[string]string, targetPodIP string) ([]corev1.Pod, error) { - pods := []corev1.Pod{} - - // generate chaos pods for each possible disruptions - for _, kind := range chaostypes.DisruptionKindNames { - subspec := instance.Spec.DisruptionKindPicker(kind) - if reflect.ValueOf(subspec).IsNil() { - continue - } - - pulseActiveDuration, pulseDormantDuration, pulseInitialDelay := time.Duration(0), time.Duration(0), time.Duration(0) - if instance.Spec.Pulse != nil { - pulseInitialDelay = instance.Spec.Pulse.InitialDelay.Duration() - pulseActiveDuration = instance.Spec.Pulse.ActiveDuration.Duration() - pulseDormantDuration = instance.Spec.Pulse.DormantDuration.Duration() - } - - notInjectedBefore := TimeToInject(instance.Spec.Triggers, instance.CreationTimestamp.Time) - - allowedHosts := r.InjectorNetworkDisruptionAllowedHosts - - // get the ip ranges of cloud provider services - if instance.Spec.Network != nil { - if instance.Spec.Network.Cloud != nil { - hosts, err := transformCloudSpecToHostsSpec(r.CloudServicesProvidersManager, instance.Spec.Network.Cloud) - if err != nil { - return nil, err - } - - instance.Spec.Network.Hosts = append(instance.Spec.Network.Hosts, hosts...) - } - - // remove default allowed hosts if disabled - if instance.Spec.Network.DisableDefaultAllowedHosts { - allowedHosts = make([]string, 0) - } - } - - xargs := chaosapi.DisruptionArgs{ - Level: instance.Spec.Level, - Kind: kind, - TargetContainers: targetContainers, - TargetName: targetName, - TargetNodeName: targetNodeName, - TargetPodIP: targetPodIP, - DryRun: instance.Spec.DryRun, - DisruptionName: instance.Name, - DisruptionNamespace: instance.Namespace, - OnInit: instance.Spec.OnInit, - PulseInitialDelay: pulseInitialDelay, - PulseActiveDuration: pulseActiveDuration, - PulseDormantDuration: pulseDormantDuration, - NotInjectedBefore: notInjectedBefore, - MetricsSink: r.MetricsSink.GetSinkName(), - AllowedHosts: allowedHosts, - DNSServer: r.InjectorDNSDisruptionDNSServer, - KubeDNS: r.InjectorDNSDisruptionKubeDNS, - ChaosNamespace: r.ChaosNamespace, - } - - // generate args for pod - args := xargs.CreateCmdArgs(subspec.GenerateArgs()) - - // append pod to chaos pods - pod := r.generatePod(instance, targetName, targetNodeName, args, kind) - pods = append(pods, pod) - } - - return pods, nil -} - // recordEventOnTarget records an event on the given target which can be either a pod or a node depending on the given disruption level -func (r *DisruptionReconciler) recordEventOnTarget(instance *chaosv1beta1.Disruption, target string, disruptionEventReason chaosv1beta1.DisruptionEventReason, chaosPod, optionalMessage string) { +func (r *DisruptionReconciler) recordEventOnTarget(ctx context.Context, instance *chaosv1beta1.Disruption, target string, disruptionEventReason chaosv1beta1.DisruptionEventReason, chaosPod, optionalMessage string) { var o runtime.Object switch instance.Spec.Level { case chaostypes.DisruptionLevelPod: p := &corev1.Pod{} - if err := r.Client.Get(context.Background(), types.NamespacedName{Namespace: instance.Namespace, Name: target}, p); err != nil { + if err := r.Client.Get(ctx, types.NamespacedName{Namespace: instance.Namespace, Name: target}, p); err != nil { r.log.Errorw("event failed to be registered on target", "error", err, "target", target) } @@ -1348,7 +873,7 @@ func (r *DisruptionReconciler) recordEventOnTarget(instance *chaosv1beta1.Disrup case chaostypes.DisruptionLevelNode: n := &corev1.Node{} - if err := r.Client.Get(context.Background(), types.NamespacedName{Name: target}, n); err != nil { + if err := r.Client.Get(ctx, types.NamespacedName{Name: target}, n); err != nil { r.log.Errorw("event failed to be registered on target", "error", err, "target", target) } @@ -1425,7 +950,7 @@ func shouldTriggerReconcile(o client.Object) bool { // ReportMetrics reports some controller metrics every minute: // - stuck on removal disruptions count // - ongoing disruptions count -func (r *DisruptionReconciler) ReportMetrics() { +func (r *DisruptionReconciler) ReportMetrics(ctx context.Context) { for { // wait for a minute <-time.After(time.Minute) @@ -1437,7 +962,7 @@ func (r *DisruptionReconciler) ReportMetrics() { l := chaosv1beta1.DisruptionList{} // list disruptions - if err := r.Client.List(context.Background(), &l); err != nil { + if err := r.Client.List(ctx, &l); err != nil { r.BaseLog.Errorw("error listing disruptions", "error", err) continue } @@ -1452,7 +977,7 @@ func (r *DisruptionReconciler) ReportMetrics() { } } - chaosPods, err := r.getChaosPods(&d, nil) + chaosPods, err := r.ChaosPodService.GetChaosPodsOfDisruption(ctx, &d, nil) if err != nil { r.BaseLog.Errorw("error listing chaos pods to send pods.gauge metric", "error", err) } @@ -1480,3 +1005,69 @@ func (r *DisruptionReconciler) ReportMetrics() { } } } + +// getEligibleTargets returns targets which can be targeted by the given instance from the given targets pool +// it skips ignored targets and targets being already targeted by another disruption +func (r *DisruptionReconciler) getEligibleTargets(ctx context.Context, instance *chaosv1beta1.Disruption, potentialTargets []string) (eligibleTargets chaosv1beta1.TargetInjections, err error) { + defer func() { + r.log.Debugw("getting eligible targets for disruption injection", "potentialTargets", potentialTargets, "eligibleTargets", eligibleTargets, "error", err) + }() + + eligibleTargets = make(chaosv1beta1.TargetInjections) + + for _, target := range potentialTargets { + // skip current targets + if instance.Status.HasTarget(target) { + continue + } + + targetLabels := map[string]string{ + chaostypes.TargetLabel: target, // filter with target name + } + + if instance.Spec.Level == chaostypes.DisruptionLevelPod { // nodes aren't namespaced and thus should only check by target name + targetLabels[chaostypes.DisruptionNamespaceLabel] = instance.Namespace // filter with current instance namespace (to avoid getting pods having the same name but living in different namespaces) + } + + chaosPods, err := r.ChaosPodService.GetChaosPodsOfDisruption(ctx, nil, targetLabels) + if err != nil { + return nil, fmt.Errorf("error getting chaos pods targeting the given target (%s): %w", target, err) + } + + // skip targets already targeted by a chaos pod from another disruption with the same kind if any + if len(chaosPods) != 0 { + if !instance.Spec.AllowDisruptedTargets { + r.log.Infow(`disruption spec does not allow to use already disrupted targets with ANY kind of existing disruption, skipping... +NB: you can specify "spec.allowDisruptedTargets: true" to allow a new disruption without any disruption kind intersection to target the same pod`, "target", target, "targetLabels", targetLabels) + + continue + } + + targetDisruptedByKinds := map[chaostypes.DisruptionKindName]string{} + for _, chaosPod := range chaosPods { + targetDisruptedByKinds[chaostypes.DisruptionKindName(chaosPod.Labels[chaostypes.DisruptionKindLabel])] = chaosPod.Name + } + + intersectionOfKinds := []string{} + + for _, kind := range instance.Spec.KindNames() { + if chaosPodName, ok := targetDisruptedByKinds[kind]; ok { + intersectionOfKinds = append(intersectionOfKinds, fmt.Sprintf("kind:%s applied by chaos-pod:%s", kind, chaosPodName)) + } + } + + if len(intersectionOfKinds) != 0 { + r.log.Infow("target is already disrupted by at least one provided kind, skipping", "target", target, "targetLabels", targetLabels, "targetDisruptedByKinds", targetDisruptedByKinds, "intersectionOfKinds", intersectionOfKinds) + + continue + } + } + + // add target if eligible + eligibleTargets[target] = chaosv1beta1.TargetInjection{ + InjectionStatus: chaostypes.DisruptionTargetInjectionStatusNotInjected, + } + } + + return eligibleTargets, nil +} diff --git a/controllers/helpers.go b/controllers/helpers.go deleted file mode 100644 index bc16487da..000000000 --- a/controllers/helpers.go +++ /dev/null @@ -1,291 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2023 Datadog, Inc. - -package controllers - -import ( - "fmt" - "math" - "regexp" - "strings" - "time" - - chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1" - "github.com/DataDog/chaos-controller/cloudservice" - cloudtypes "github.com/DataDog/chaos-controller/cloudservice/types" - chaostypes "github.com/DataDog/chaos-controller/types" - corev1 "k8s.io/api/core/v1" - k8serrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/util/intstr" -) - -// This function returns a scaled value from an IntOrString type. If the IntOrString -// is a percentage string value it's treated as a percentage and scaled appropriately -// in accordance to the total, if it's an int value it's treated as a a simple value and -// if it is a string value which is either non-numeric or numeric but lacking a trailing '%' it returns an error. -func getScaledValueFromIntOrPercent(intOrPercent *intstr.IntOrString, total int, roundUp bool) (int, error) { - if intOrPercent == nil { - return 0, k8serrors.NewBadRequest("nil value for IntOrString") - } - - value, isPercent, err := chaosv1beta1.GetIntOrPercentValueSafely(intOrPercent) - if err != nil { - return 0, fmt.Errorf("invalid value for IntOrString: %w", err) - } - - if isPercent { - if roundUp { - value = int(math.Ceil(float64(value) * (float64(total)) / 100)) - } else { - value = int(math.Floor(float64(value) * (float64(total)) / 100)) - } - } - - return value, nil -} - -type terminationStatus uint8 - -const ( - tsNotTerminated terminationStatus = iota - tsTemporarilyTerminated - tsDefinitivelyTerminated -) - -// disruptionTerminationStatus determines if the disruption injection is temporarily or definitively terminated -// disruption can enter a temporary injection removal state when all targets have disappeared (due to rollout or manual deletion) -// disruption will enter a definitive ended state when remaining duration is over or has been deleted -func disruptionTerminationStatus(instance chaosv1beta1.Disruption, chaosPods []corev1.Pod) terminationStatus { - // a not yet created disruption is neither temporary nor definitively ended - if instance.CreationTimestamp.IsZero() { - return tsNotTerminated - } - - // a definitive state (expired duration or deletion) imply a definitively deleted injection - // and should be returned prior to a temporarily terminated state - if calculateRemainingDuration(instance) <= 0 || !instance.DeletionTimestamp.IsZero() { - return tsDefinitivelyTerminated - } - - if len(chaosPods) == 0 { - // we were never injected, we are hence not terminated if we reach here - if instance.Status.InjectionStatus.NeverInjected() { - return tsNotTerminated - } - - // we were injected before hence temporarily not terminated - return tsTemporarilyTerminated - } - - // if all pods exited successfully, we can consider the disruption is ended already - // it can be caused by either an appromixative date sync (in a distributed infra it's hard) - // or by deletion of targets leading to deletion of injectors - // injection terminated with an error are considered NOT terminated - for _, chaosPod := range chaosPods { - for _, containerStatuses := range chaosPod.Status.ContainerStatuses { - if containerStatuses.State.Terminated == nil || containerStatuses.State.Terminated.ExitCode != 0 { - return tsNotTerminated - } - } - } - - // this MIGHT be a temporary status, that could become definitive once disruption is expired or deleted - return tsTemporarilyTerminated -} - -func calculateRemainingDuration(instance chaosv1beta1.Disruption) time.Duration { - return calculateDeadline( - instance.Spec.Duration.Duration(), - TimeToInject(instance.Spec.Triggers, instance.ObjectMeta.CreationTimestamp.Time), - ) -} - -// returned value can be negative if deadline is in the past -func calculateDeadline(duration time.Duration, creationTime time.Time) time.Duration { - // first we must calculate the timout from when the disruption was created, not from now - timeout := creationTime.Add(duration) - now := time.Now() // rather not take the risk that the time changes by a second during this function - - // return the number of seconds between now and the deadline - return timeout.Sub(now) -} - -// assert label selector matches valid grammar, avoids CORE-414 -func validateLabelSelector(selector labels.Selector) error { - labelGrammar := "([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]" - rgx := regexp.MustCompile(labelGrammar) - - if !rgx.MatchString(selector.String()) { - return fmt.Errorf("given label selector is invalid, it does not match valid selector grammar: %s %s", selector.String(), labelGrammar) - } - - return nil -} - -// transformCloudSpecToHostsSpec from a cloud spec disruption, get all ip ranges of services provided and transform them into a list of hosts spec -func transformCloudSpecToHostsSpec(cloudManager *cloudservice.CloudServicesProvidersManager, cloudSpec *chaosv1beta1.NetworkDisruptionCloudSpec) ([]chaosv1beta1.NetworkDisruptionHostSpec, error) { - hosts := []chaosv1beta1.NetworkDisruptionHostSpec{} - clouds := cloudSpec.TransformToCloudMap() - - for cloudName, serviceList := range clouds { - serviceListNames := []string{} - - for _, service := range serviceList { - serviceListNames = append(serviceListNames, service.ServiceName) - } - - ipRangesPerService, err := cloudManager.GetServicesIPRanges(cloudtypes.CloudProviderName(cloudName), serviceListNames) - if err != nil { - return nil, err - } - - for _, serviceSpec := range serviceList { - for _, ipRange := range ipRangesPerService[serviceSpec.ServiceName] { - hosts = append(hosts, chaosv1beta1.NetworkDisruptionHostSpec{ - Host: ipRange, - Protocol: serviceSpec.Protocol, - Flow: serviceSpec.Flow, - ConnState: serviceSpec.ConnState, - }) - } - } - } - - return hosts, nil -} - -// isModifiedError tells us if this error is of the form: -// "Operation cannot be fulfilled on disruptions.chaos.datadoghq.com "chaos-network-drop": the object has been modified; please apply your changes to the latest version and try again" -// Sadly this doesn't seem to be one of the errors checkable with a function from "k8s.io/apimachinery/pkg/api/errors" -// So we parse the error message directly -func isModifiedError(err error) bool { - return strings.Contains(err.Error(), "please apply your changes to the latest version and try again") -} - -// TimeToCreatePods takes the DisruptionTriggers field from a Disruption spec, along with the time.Time at which that disruption was created -// It returns the earliest time.Time at which the chaos-controller should begin creating chaos pods, given the specified DisruptionTriggers -func TimeToCreatePods(triggers chaosv1beta1.DisruptionTriggers, creationTimestamp time.Time) time.Time { - if triggers.IsZero() { - return creationTimestamp - } - - if triggers.CreatePods.IsZero() { - return creationTimestamp - } - - var noPodsBefore time.Time - - // validation should have already prevented a situation where both Offset and NotBefore are set - if !triggers.CreatePods.NotBefore.IsZero() { - noPodsBefore = triggers.CreatePods.NotBefore.Time - } - - if triggers.CreatePods.Offset.Duration() > 0 { - noPodsBefore = creationTimestamp.Add(triggers.CreatePods.Offset.Duration()) - } - - if creationTimestamp.After(noPodsBefore) { - return creationTimestamp - } - - return noPodsBefore -} - -// TimeToInject takes the DisruptionTriggers field from a Disruption spec, along with the time.Time at which that disruption was created -// It returns the earliest time.Time at which chaos pods should inject into their targets, given the specified DisruptionTriggers -func TimeToInject(triggers chaosv1beta1.DisruptionTriggers, creationTimestamp time.Time) time.Time { - if triggers.IsZero() { - return creationTimestamp - } - - if triggers.Inject.IsZero() { - return TimeToCreatePods(triggers, creationTimestamp) - } - - var notInjectedBefore time.Time - - // validation should have already prevented a situation where both Offset and NotBefore are set - if !triggers.Inject.NotBefore.IsZero() { - notInjectedBefore = triggers.Inject.NotBefore.Time - } - - if triggers.Inject.Offset.Duration() > 0 { - // We measure the offset from the latter of two timestamps: creationTimestamp of the disruption, and spec.trigger.createPods - notInjectedBefore = TimeToCreatePods(triggers, creationTimestamp).Add(triggers.Inject.Offset.Duration()) - } - - if creationTimestamp.After(notInjectedBefore) { - return creationTimestamp - } - - return notInjectedBefore -} - -// getEligibleTargets returns targets which can be targeted by the given instance from the given targets pool -// it skips ignored targets and targets being already targeted by another disruption -func (r *DisruptionReconciler) getEligibleTargets(instance *chaosv1beta1.Disruption, potentialTargets []string) (eligibleTargets chaosv1beta1.TargetInjections, err error) { - defer func() { - r.log.Debugw("getting eligible targets for disruption injection", "potential_targets", potentialTargets, "eligible_targets", eligibleTargets, "error", err) - }() - - eligibleTargets = make(chaosv1beta1.TargetInjections) - - for _, target := range potentialTargets { - // skip current targets - if instance.Status.HasTarget(target) { - continue - } - - targetLabels := map[string]string{ - chaostypes.TargetLabel: target, // filter with target name - } - - if instance.Spec.Level == chaostypes.DisruptionLevelPod { // nodes aren't namespaced and thus should only check by target name - targetLabels[chaostypes.DisruptionNamespaceLabel] = instance.Namespace // filter with current instance namespace (to avoid getting pods having the same name but living in different namespaces) - } - - chaosPods, err := r.getChaosPods(nil, targetLabels) - if err != nil { - return nil, fmt.Errorf("error getting chaos pods targeting the given target (%s): %w", target, err) - } - - // skip targets already targeted by a chaos pod from another disruption with the same kind if any - if len(chaosPods) != 0 { - if !instance.Spec.AllowDisruptedTargets { - r.log.Infow(`disruption spec does not allow to use already disrupted targets with ANY kind of existing disruption, skipping... -NB: you can specify "spec.allowDisruptedTargets: true" to allow a new disruption without any disruption kind intersection to target the same pod`, "target", target, "target_labels", targetLabels) - - continue - } - - targetDisruptedByKinds := map[chaostypes.DisruptionKindName]string{} - for _, chaosPod := range chaosPods { - targetDisruptedByKinds[chaostypes.DisruptionKindName(chaosPod.Labels[chaostypes.DisruptionKindLabel])] = chaosPod.Name - } - - intersectionOfKinds := []string{} - - for _, kind := range instance.Spec.KindNames() { - if chaosPodName, ok := targetDisruptedByKinds[kind]; ok { - intersectionOfKinds = append(intersectionOfKinds, fmt.Sprintf("kind:%s applied by chaos-pod:%s", kind, chaosPodName)) - } - } - - if len(intersectionOfKinds) != 0 { - r.log.Infow("target is already disrupted by at least one provided kind, skipping", "target", target, "target_labels", targetLabels, "target_disrupted_by_kinds", targetDisruptedByKinds, "intersection_of_kinds", intersectionOfKinds) - - continue - } - } - - // add target if eligible - eligibleTargets[target] = chaosv1beta1.TargetInjection{ - InjectionStatus: chaostypes.DisruptionTargetInjectionStatusNotInjected, - } - } - - return eligibleTargets, nil -} diff --git a/controllers/helpers_test.go b/controllers/helpers_test.go deleted file mode 100644 index d66ded62c..000000000 --- a/controllers/helpers_test.go +++ /dev/null @@ -1,369 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2023 Datadog, Inc. - -package controllers - -import ( - "time" - - chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1" - chaostypes "github.com/DataDog/chaos-controller/types" - - "github.com/DataDog/chaos-controller/api/v1beta1" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" -) - -var _ = Describe("Label Selector Validation", func() { - Context("validating an empty label selector", func() { - It("", func() { - selector := labels.Set{} - Expect(validateLabelSelector(selector.AsSelector())).ToNot(Succeed()) - }) - }) - Context("validating a good label selector", func() { - It("", func() { - selector := labels.Set{"foo": "bar"} - Expect(validateLabelSelector(selector.AsSelector())).To(Succeed()) - }) - }) - Context("validating special characters in label selector", func() { - It("", func() { - selector := labels.Set{"foo": "”bar”"} - //.AsSelector() should strip invalid characters - Expect(validateLabelSelector(selector.AsSelector())).To(Succeed()) - }) - }) - Context("validating too many quotes in label selector", func() { - It("", func() { - selector := labels.Set{"foo": "\"bar\""} - //.AsSelector() should strip invalid characters - Expect(validateLabelSelector(selector.AsSelector())).To(Succeed()) - }) - }) -}) - -var _ = Describe("Inject and CreatePods Trigger tests", func() { - var creationTimestamp time.Time - - BeforeEach(func() { - creationTimestamp = time.Now() - }) - - Context("TimeToCreatePods", func() { - It("should return creationTimestamp if triggers is nil", func() { - var triggers v1beta1.DisruptionTriggers - - Expect(TimeToCreatePods(triggers, creationTimestamp)).Should(Equal(creationTimestamp)) - }) - - It("should return creationTimestamp if triggers.createPods is nil", func() { - triggers := v1beta1.DisruptionTriggers{ - Inject: v1beta1.DisruptionTrigger{ - Offset: "15m", - }, - } - - Expect(TimeToCreatePods(triggers, creationTimestamp)).Should(Equal(creationTimestamp)) - }) - - It("should return createPods.notBefore if set", func() { - notBefore := time.Now().Add(time.Minute) - triggers := v1beta1.DisruptionTriggers{ - Inject: v1beta1.DisruptionTrigger{ - Offset: "15m", - }, - CreatePods: v1beta1.DisruptionTrigger{ - NotBefore: metav1.NewTime(notBefore), - Offset: "", - }, - } - - Expect(TimeToCreatePods(triggers, creationTimestamp)).Should(Equal(notBefore)) - }) - - It("should return a time after creationTimestamp if createPods.offset is set", func() { - offsetTime := creationTimestamp.Add(time.Minute * 5) - triggers := v1beta1.DisruptionTriggers{ - CreatePods: v1beta1.DisruptionTrigger{ - NotBefore: metav1.Time{}, - Offset: "5m", - }, - } - - Expect(TimeToCreatePods(triggers, creationTimestamp)).Should(Equal(offsetTime)) - }) - }) - - Context("TimeToInject", func() { - It("should return creationTimestamp if triggers is nil", func() { - var triggers v1beta1.DisruptionTriggers - - Expect(TimeToInject(triggers, creationTimestamp)).Should(Equal(creationTimestamp)) - }) - - It("should return triggers.createPods if triggers.inject is nil", func() { - notBefore := time.Now().Add(time.Minute) - triggers := v1beta1.DisruptionTriggers{ - CreatePods: v1beta1.DisruptionTrigger{ - NotBefore: metav1.NewTime(notBefore), - Offset: "", - }, - } - - Expect(TimeToInject(triggers, creationTimestamp)).Should(Equal(notBefore)) - }) - - It("should return inject.notBefore if set", func() { - notBefore := time.Now().Add(time.Minute) - triggers := v1beta1.DisruptionTriggers{ - Inject: v1beta1.DisruptionTrigger{ - NotBefore: metav1.NewTime(notBefore), - Offset: "1", - }, - CreatePods: v1beta1.DisruptionTrigger{ - NotBefore: metav1.Time{}, - Offset: "2m", - }, - } - - Expect(TimeToInject(triggers, creationTimestamp)).Should(Equal(notBefore)) - }) - - It("should return a time after creationTimestamp if inject.offset is set", func() { - offsetTime := creationTimestamp.Add(time.Minute) - triggers := v1beta1.DisruptionTriggers{ - Inject: v1beta1.DisruptionTrigger{ - NotBefore: metav1.Time{}, - Offset: "1m", - }, - } - - Expect(TimeToInject(triggers, creationTimestamp)).Should(Equal(offsetTime)) - }) - }) -}) - -var _ = DescribeTable( - "disruptionTerminationStatus", - func(disruption *disruptionBuilder, pods podsBuilder, expectTerminationStatus terminationStatus) { - Expect(disruptionTerminationStatus(disruption.Build(), pods.Build())).To(Equal(expectTerminationStatus)) - }, - Entry( - "not yet created disruption IS NOT terminated", - newDisruptionBuilder().Reset(), - nil, - tsNotTerminated, - ), - Entry( - "1s before deadline, disruption IS NOT terminated", - newDisruptionBuilder().WithCreation(time.Minute-time.Second), - newPodsBuilder(), - tsNotTerminated, - ), - Entry( - "1s after deadline, disruption IS definitively terminated", - newDisruptionBuilder().WithCreation(time.Minute+time.Second), - newPodsBuilder(), - tsDefinitivelyTerminated, - ), - Entry( - "half duration disruption IS NOT terminated", - newDisruptionBuilder(), - newPodsBuilder(), - tsNotTerminated, - ), - Entry( - "at deadline, disruption IS definitively terminated (however even ns before it is not)", - newDisruptionBuilder().WithCreation(time.Minute), - newPodsBuilder(), - tsDefinitivelyTerminated, - ), - Entry( - "deleted disruption IS definitively terminated", - newDisruptionBuilder().WithCreation(time.Minute).WithDeletion(), - newPodsBuilder(), - tsDefinitivelyTerminated, - ), - Entry( - "one chaos pod exited out of two IS NOT terminated", - newDisruptionBuilder(), - newPodsBuilder().One().Terminated().Parent(), - tsNotTerminated, - ), - Entry( - "all chaos pods exited IS temporarily terminated", - newDisruptionBuilder(), - newPodsBuilder().One().Terminated().Parent().Two().Terminated().Parent(), - tsTemporarilyTerminated, - ), - Entry( - "no pod injected is temporarily terminated", - newDisruptionBuilder().WithInjectionStatus(chaostypes.DisruptionInjectionStatusInjected), - nil, - tsTemporarilyTerminated, - ), - Entry( - "no pod partially injected is temporarily terminated", - newDisruptionBuilder().WithInjectionStatus(chaostypes.DisruptionInjectionStatusPartiallyInjected), - nil, - tsTemporarilyTerminated, - ), - Entry( - "no pod NOT injected is not terminated", - newDisruptionBuilder().WithInjectionStatus(chaostypes.DisruptionInjectionStatusNotInjected), - nil, - tsNotTerminated, - ), - Entry( - "no pod initial injection status is not terminated", - newDisruptionBuilder(), - nil, - tsNotTerminated, - ), -) - -type disruptionBuilder struct { - *chaosv1beta1.Disruption - // we store action we want to perform instead of performing them right away because they are time sensititive - // this enables us to ensure time.Now is as late as it can be without faking it (that we should do at some point) - modifiers []func() -} - -func newDisruptionBuilder() *disruptionBuilder { - return (&disruptionBuilder{ - Disruption: &chaosv1beta1.Disruption{ - Spec: chaosv1beta1.DisruptionSpec{ - Duration: "1m", // per spec definition a valid disruption going to the reconcile loop MUST have a duration, let's not test wrong test cases - }, - }, - }).WithCreation(30 * time.Second) -} - -func (b *disruptionBuilder) Build() chaosv1beta1.Disruption { - for _, modifier := range b.modifiers { - modifier() - } - - return *b.Disruption -} - -func (b *disruptionBuilder) Reset() *disruptionBuilder { - b.modifiers = nil - - return b -} - -func (b *disruptionBuilder) WithCreation(past time.Duration) *disruptionBuilder { - b.modifiers = append( - b.modifiers, - func() { - b.CreationTimestamp = v1.NewTime(time.Now().Add(-past)) - }) - - return b -} - -func (b *disruptionBuilder) WithDeletion() *disruptionBuilder { - b.modifiers = append( - b.modifiers, - func() { - v1t := v1.NewTime(time.Now()) - - b.DeletionTimestamp = &v1t - }) - - return b -} - -func (b *disruptionBuilder) WithInjectionStatus(status chaostypes.DisruptionInjectionStatus) *disruptionBuilder { - b.Status.InjectionStatus = status - - return b -} - -type podsBuilder []*podBuilder - -type podBuilder struct { - *corev1.Pod - parent podsBuilder -} - -func newPodsBuilder() podsBuilder { - return podsBuilder{ - { - Pod: &corev1.Pod{ - Status: corev1.PodStatus{ - ContainerStatuses: []corev1.ContainerStatus{ - { - State: corev1.ContainerState{}, - }, - }, - }, - }, - }, - { - Pod: &corev1.Pod{ - Status: corev1.PodStatus{ - ContainerStatuses: []corev1.ContainerStatus{ - { - State: corev1.ContainerState{}, - }, - }, - }, - }, - }, - } -} - -func (p podsBuilder) Build() []corev1.Pod { - if p == nil { - return nil - } - - pods := make([]corev1.Pod, 0, len(p)) - - for _, pod := range p { - pods = append(pods, *pod.Pod) - } - - return pods -} - -func (p podsBuilder) Take(index int) *podBuilder { - if p[index].parent == nil { - p[index].parent = p - } - - return p[index] -} - -func (p podsBuilder) One() *podBuilder { - return p.Take(0) -} - -func (p podsBuilder) Two() *podBuilder { - return p.Take(1) -} - -func (p *podBuilder) Parent() podsBuilder { - return p.parent -} - -func (p *podBuilder) TerminatedWith(exitCode int32) *podBuilder { - p.Pod.Status.ContainerStatuses[0].State.Terminated = &corev1.ContainerStateTerminated{ - ExitCode: exitCode, - } - - return p -} - -func (p *podBuilder) Terminated() *podBuilder { - return p.TerminatedWith(0) -} diff --git a/controllers/suite_toolsfor_test.go b/controllers/suite_toolsfor_test.go index 878b5ca1a..89810867c 100644 --- a/controllers/suite_toolsfor_test.go +++ b/controllers/suite_toolsfor_test.go @@ -10,12 +10,11 @@ import ( "strings" "time" + chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1" + chaostypes "github.com/DataDog/chaos-controller/types" "github.com/google/uuid" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - - chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1" - chaostypes "github.com/DataDog/chaos-controller/types" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -86,7 +85,7 @@ func calcDisruptionGoneTimeout(disruption chaosv1beta1.Disruption) time.Duration Fail("an existing disruption should have a non-zero duration") } - if remainingDisruptionDuration := calculateRemainingDuration(disruption); remainingDisruptionDuration > 0 { + if remainingDisruptionDuration := disruption.RemainingDuration(); remainingDisruptionDuration > 0 { disruptionDuration = remainingDisruptionDuration } } @@ -422,7 +421,7 @@ func PickFirstChaodPod(ctx SpecContext, disruption chaosv1beta1.Disruption) core func ExpectChaosPodToDisappear(ctx SpecContext, chaosPodKey types.NamespacedName, disruption chaosv1beta1.Disruption) { Eventually(k8sClient.Get). WithContext(ctx).WithArguments(chaosPodKey, &corev1.Pod{}). - Within(calculateRemainingDuration(disruption)).ProbeEvery(disruptionPotentialChangesEvery). + Within(disruption.RemainingDuration()).ProbeEvery(disruptionPotentialChangesEvery). Should(WithTransform(apierrors.IsNotFound, BeTrue())) } diff --git a/main.go b/main.go index 9538bdb9e..ec3beb58f 100644 --- a/main.go +++ b/main.go @@ -24,6 +24,7 @@ import ( profilertypes "github.com/DataDog/chaos-controller/o11y/profiler/types" "github.com/DataDog/chaos-controller/o11y/tracer" tracertypes "github.com/DataDog/chaos-controller/o11y/tracer/types" + "github.com/DataDog/chaos-controller/services" "github.com/DataDog/chaos-controller/targetselector" "github.com/DataDog/chaos-controller/utils" "github.com/DataDog/chaos-controller/watchers" @@ -157,36 +158,49 @@ func main() { } // initialize the cloud provider manager which will handle ip ranges files updates - cloudProviderManager, err := cloudservice.New(logger, cfg.Controller.CloudProviders) + cloudProviderManager, err := cloudservice.New(logger, cfg.Controller.CloudProviders, nil) if err != nil { logger.Fatalw("error initializing CloudProviderManager", "error", err) } cloudProviderManager.StartPeriodicPull() + chaosPodService, err := services.NewChaosPodService(services.ChaosPodServiceConfig{ + Client: mgr.GetClient(), + Log: logger, + ChaosNamespace: cfg.Injector.ChaosNamespace, + TargetSelector: targetSelector, + Injector: services.ChaosPodServiceInjectorConfig{ + ServiceAccount: cfg.Injector.ServiceAccount, + Image: cfg.Injector.Image, + Annotations: cfg.Injector.Annotations, + Labels: cfg.Injector.Labels, + NetworkDisruptionAllowedHosts: cfg.Injector.NetworkDisruption.AllowedHosts, + DNSDisruptionDNSServer: cfg.Injector.DNSDisruption.DNSServer, + DNSDisruptionKubeDNS: cfg.Injector.DNSDisruption.KubeDNS, + ImagePullSecrets: cfg.Injector.ImagePullSecrets, + }, + ImagePullSecrets: cfg.Injector.ImagePullSecrets, + MetricsSink: metricsSink, + CloudServicesProvidersManager: cloudProviderManager, + }) + + if err != nil { + logger.Fatalw("error initializing ChaosPodService", "error", err) + } + // create disruption reconciler disruptionReconciler := &controllers.DisruptionReconciler{ - Client: mgr.GetClient(), - BaseLog: logger, - Scheme: mgr.GetScheme(), - Recorder: broadcaster.NewRecorder(mgr.GetScheme(), corev1.EventSource{Component: chaosv1beta1.SourceDisruptionComponent}), - MetricsSink: metricsSink, - TracerSink: tracerSink, - TargetSelector: targetSelector, - InjectorAnnotations: cfg.Injector.Annotations, - InjectorLabels: cfg.Injector.Labels, - InjectorServiceAccount: cfg.Injector.ServiceAccount, - InjectorImage: cfg.Injector.Image, - ChaosNamespace: cfg.Injector.ChaosNamespace, - InjectorDNSDisruptionDNSServer: cfg.Injector.DNSDisruption.DNSServer, - InjectorDNSDisruptionKubeDNS: cfg.Injector.DNSDisruption.KubeDNS, - InjectorNetworkDisruptionAllowedHosts: cfg.Injector.NetworkDisruption.AllowedHosts, - ImagePullSecrets: cfg.Injector.ImagePullSecrets, - ExpiredDisruptionGCDelay: gcPtr, - CacheContextStore: make(map[string]controllers.CtxTuple), - Reader: mgr.GetAPIReader(), - EnableObserver: cfg.Controller.EnableObserver, - CloudServicesProvidersManager: cloudProviderManager, + Client: mgr.GetClient(), + BaseLog: logger, + Scheme: mgr.GetScheme(), + Recorder: broadcaster.NewRecorder(mgr.GetScheme(), corev1.EventSource{Component: chaosv1beta1.SourceDisruptionComponent}), + MetricsSink: metricsSink, + TracerSink: tracerSink, + TargetSelector: targetSelector, + ExpiredDisruptionGCDelay: gcPtr, + CacheContextStore: make(map[string]controllers.CtxTuple), + ChaosPodService: chaosPodService, } informerClient := kubernetes.NewForConfigOrDie(ctrl.GetConfigOrDie()) @@ -197,8 +211,6 @@ func main() { logger.Fatalw("unable to create controller", "controller", chaosv1beta1.DisruptionKind, "error", err) } - disruptionReconciler.Controller = cont - watchersFactoryConfig := watchers.FactoryConfig{ Log: logger, MetricSink: metricsSink, @@ -207,7 +219,7 @@ func main() { ChaosNamespace: cfg.Injector.ChaosNamespace, } watcherFactory := watchers.NewWatcherFactory(watchersFactoryConfig) - disruptionReconciler.DisruptionsWatchersManager = watchers.NewDisruptionsWatchersManager(cont, watcherFactory, disruptionReconciler.Reader, logger) + disruptionReconciler.DisruptionsWatchersManager = watchers.NewDisruptionsWatchersManager(cont, watcherFactory, mgr.GetAPIReader(), logger) ctx, cancel := context.WithCancel(context.Background()) @@ -233,7 +245,7 @@ func main() { stopCh := make(chan struct{}) kubeInformerFactory.Start(stopCh) - go disruptionReconciler.ReportMetrics() + go disruptionReconciler.ReportMetrics(ctx) if cfg.Controller.DisruptionRolloutEnabled { // create deployment and statefulset informers diff --git a/mocks/client.go b/mocks/client.go new file mode 100644 index 000000000..0fc621123 --- /dev/null +++ b/mocks/client.go @@ -0,0 +1,630 @@ +// Code generated by mockery. DO NOT EDIT. + +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2023 Datadog, Inc. +package mocks + +import ( + context "context" + + client "sigs.k8s.io/controller-runtime/pkg/client" + + meta "k8s.io/apimachinery/pkg/api/meta" + + mock "github.com/stretchr/testify/mock" + + runtime "k8s.io/apimachinery/pkg/runtime" + + types "k8s.io/apimachinery/pkg/types" +) + +// K8SClientMock is an autogenerated mock type for the Client type +type K8SClientMock struct { + mock.Mock +} + +type K8SClientMock_Expecter struct { + mock *mock.Mock +} + +func (_m *K8SClientMock) EXPECT() *K8SClientMock_Expecter { + return &K8SClientMock_Expecter{mock: &_m.Mock} +} + +// Create provides a mock function with given fields: ctx, obj, opts +func (_m *K8SClientMock) Create(ctx context.Context, obj client.Object, opts ...client.CreateOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, obj) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, client.Object, ...client.CreateOption) error); ok { + r0 = rf(ctx, obj, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// K8SClientMock_Create_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Create' +type K8SClientMock_Create_Call struct { + *mock.Call +} + +// Create is a helper method to define mock.On call +// - ctx context.Context +// - obj client.Object +// - opts ...client.CreateOption +func (_e *K8SClientMock_Expecter) Create(ctx interface{}, obj interface{}, opts ...interface{}) *K8SClientMock_Create_Call { + return &K8SClientMock_Create_Call{Call: _e.mock.On("Create", + append([]interface{}{ctx, obj}, opts...)...)} +} + +func (_c *K8SClientMock_Create_Call) Run(run func(ctx context.Context, obj client.Object, opts ...client.CreateOption)) *K8SClientMock_Create_Call { + _c.Call.Run(func(args mock.Arguments) { + variadicArgs := make([]client.CreateOption, len(args)-2) + for i, a := range args[2:] { + if a != nil { + variadicArgs[i] = a.(client.CreateOption) + } + } + run(args[0].(context.Context), args[1].(client.Object), variadicArgs...) + }) + return _c +} + +func (_c *K8SClientMock_Create_Call) Return(_a0 error) *K8SClientMock_Create_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *K8SClientMock_Create_Call) RunAndReturn(run func(context.Context, client.Object, ...client.CreateOption) error) *K8SClientMock_Create_Call { + _c.Call.Return(run) + return _c +} + +// Delete provides a mock function with given fields: ctx, obj, opts +func (_m *K8SClientMock) Delete(ctx context.Context, obj client.Object, opts ...client.DeleteOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, obj) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, client.Object, ...client.DeleteOption) error); ok { + r0 = rf(ctx, obj, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// K8SClientMock_Delete_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Delete' +type K8SClientMock_Delete_Call struct { + *mock.Call +} + +// Delete is a helper method to define mock.On call +// - ctx context.Context +// - obj client.Object +// - opts ...client.DeleteOption +func (_e *K8SClientMock_Expecter) Delete(ctx interface{}, obj interface{}, opts ...interface{}) *K8SClientMock_Delete_Call { + return &K8SClientMock_Delete_Call{Call: _e.mock.On("Delete", + append([]interface{}{ctx, obj}, opts...)...)} +} + +func (_c *K8SClientMock_Delete_Call) Run(run func(ctx context.Context, obj client.Object, opts ...client.DeleteOption)) *K8SClientMock_Delete_Call { + _c.Call.Run(func(args mock.Arguments) { + variadicArgs := make([]client.DeleteOption, len(args)-2) + for i, a := range args[2:] { + if a != nil { + variadicArgs[i] = a.(client.DeleteOption) + } + } + run(args[0].(context.Context), args[1].(client.Object), variadicArgs...) + }) + return _c +} + +func (_c *K8SClientMock_Delete_Call) Return(_a0 error) *K8SClientMock_Delete_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *K8SClientMock_Delete_Call) RunAndReturn(run func(context.Context, client.Object, ...client.DeleteOption) error) *K8SClientMock_Delete_Call { + _c.Call.Return(run) + return _c +} + +// DeleteAllOf provides a mock function with given fields: ctx, obj, opts +func (_m *K8SClientMock) DeleteAllOf(ctx context.Context, obj client.Object, opts ...client.DeleteAllOfOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, obj) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, client.Object, ...client.DeleteAllOfOption) error); ok { + r0 = rf(ctx, obj, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// K8SClientMock_DeleteAllOf_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteAllOf' +type K8SClientMock_DeleteAllOf_Call struct { + *mock.Call +} + +// DeleteAllOf is a helper method to define mock.On call +// - ctx context.Context +// - obj client.Object +// - opts ...client.DeleteAllOfOption +func (_e *K8SClientMock_Expecter) DeleteAllOf(ctx interface{}, obj interface{}, opts ...interface{}) *K8SClientMock_DeleteAllOf_Call { + return &K8SClientMock_DeleteAllOf_Call{Call: _e.mock.On("DeleteAllOf", + append([]interface{}{ctx, obj}, opts...)...)} +} + +func (_c *K8SClientMock_DeleteAllOf_Call) Run(run func(ctx context.Context, obj client.Object, opts ...client.DeleteAllOfOption)) *K8SClientMock_DeleteAllOf_Call { + _c.Call.Run(func(args mock.Arguments) { + variadicArgs := make([]client.DeleteAllOfOption, len(args)-2) + for i, a := range args[2:] { + if a != nil { + variadicArgs[i] = a.(client.DeleteAllOfOption) + } + } + run(args[0].(context.Context), args[1].(client.Object), variadicArgs...) + }) + return _c +} + +func (_c *K8SClientMock_DeleteAllOf_Call) Return(_a0 error) *K8SClientMock_DeleteAllOf_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *K8SClientMock_DeleteAllOf_Call) RunAndReturn(run func(context.Context, client.Object, ...client.DeleteAllOfOption) error) *K8SClientMock_DeleteAllOf_Call { + _c.Call.Return(run) + return _c +} + +// Get provides a mock function with given fields: ctx, key, obj, opts +func (_m *K8SClientMock) Get(ctx context.Context, key types.NamespacedName, obj client.Object, opts ...client.GetOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, key, obj) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, types.NamespacedName, client.Object, ...client.GetOption) error); ok { + r0 = rf(ctx, key, obj, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// K8SClientMock_Get_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Get' +type K8SClientMock_Get_Call struct { + *mock.Call +} + +// Get is a helper method to define mock.On call +// - ctx context.Context +// - key types.NamespacedName +// - obj client.Object +// - opts ...client.GetOption +func (_e *K8SClientMock_Expecter) Get(ctx interface{}, key interface{}, obj interface{}, opts ...interface{}) *K8SClientMock_Get_Call { + return &K8SClientMock_Get_Call{Call: _e.mock.On("Get", + append([]interface{}{ctx, key, obj}, opts...)...)} +} + +func (_c *K8SClientMock_Get_Call) Run(run func(ctx context.Context, key types.NamespacedName, obj client.Object, opts ...client.GetOption)) *K8SClientMock_Get_Call { + _c.Call.Run(func(args mock.Arguments) { + variadicArgs := make([]client.GetOption, len(args)-3) + for i, a := range args[3:] { + if a != nil { + variadicArgs[i] = a.(client.GetOption) + } + } + run(args[0].(context.Context), args[1].(types.NamespacedName), args[2].(client.Object), variadicArgs...) + }) + return _c +} + +func (_c *K8SClientMock_Get_Call) Return(_a0 error) *K8SClientMock_Get_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *K8SClientMock_Get_Call) RunAndReturn(run func(context.Context, types.NamespacedName, client.Object, ...client.GetOption) error) *K8SClientMock_Get_Call { + _c.Call.Return(run) + return _c +} + +// List provides a mock function with given fields: ctx, list, opts +func (_m *K8SClientMock) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, list) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, client.ObjectList, ...client.ListOption) error); ok { + r0 = rf(ctx, list, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// K8SClientMock_List_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'List' +type K8SClientMock_List_Call struct { + *mock.Call +} + +// List is a helper method to define mock.On call +// - ctx context.Context +// - list client.ObjectList +// - opts ...client.ListOption +func (_e *K8SClientMock_Expecter) List(ctx interface{}, list interface{}, opts ...interface{}) *K8SClientMock_List_Call { + return &K8SClientMock_List_Call{Call: _e.mock.On("List", + append([]interface{}{ctx, list}, opts...)...)} +} + +func (_c *K8SClientMock_List_Call) Run(run func(ctx context.Context, list client.ObjectList, opts ...client.ListOption)) *K8SClientMock_List_Call { + _c.Call.Run(func(args mock.Arguments) { + variadicArgs := make([]client.ListOption, len(args)-2) + for i, a := range args[2:] { + if a != nil { + variadicArgs[i] = a.(client.ListOption) + } + } + run(args[0].(context.Context), args[1].(client.ObjectList), variadicArgs...) + }) + return _c +} + +func (_c *K8SClientMock_List_Call) Return(_a0 error) *K8SClientMock_List_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *K8SClientMock_List_Call) RunAndReturn(run func(context.Context, client.ObjectList, ...client.ListOption) error) *K8SClientMock_List_Call { + _c.Call.Return(run) + return _c +} + +// Patch provides a mock function with given fields: ctx, obj, patch, opts +func (_m *K8SClientMock) Patch(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.PatchOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, obj, patch) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, client.Object, client.Patch, ...client.PatchOption) error); ok { + r0 = rf(ctx, obj, patch, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// K8SClientMock_Patch_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Patch' +type K8SClientMock_Patch_Call struct { + *mock.Call +} + +// Patch is a helper method to define mock.On call +// - ctx context.Context +// - obj client.Object +// - patch client.Patch +// - opts ...client.PatchOption +func (_e *K8SClientMock_Expecter) Patch(ctx interface{}, obj interface{}, patch interface{}, opts ...interface{}) *K8SClientMock_Patch_Call { + return &K8SClientMock_Patch_Call{Call: _e.mock.On("Patch", + append([]interface{}{ctx, obj, patch}, opts...)...)} +} + +func (_c *K8SClientMock_Patch_Call) Run(run func(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.PatchOption)) *K8SClientMock_Patch_Call { + _c.Call.Run(func(args mock.Arguments) { + variadicArgs := make([]client.PatchOption, len(args)-3) + for i, a := range args[3:] { + if a != nil { + variadicArgs[i] = a.(client.PatchOption) + } + } + run(args[0].(context.Context), args[1].(client.Object), args[2].(client.Patch), variadicArgs...) + }) + return _c +} + +func (_c *K8SClientMock_Patch_Call) Return(_a0 error) *K8SClientMock_Patch_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *K8SClientMock_Patch_Call) RunAndReturn(run func(context.Context, client.Object, client.Patch, ...client.PatchOption) error) *K8SClientMock_Patch_Call { + _c.Call.Return(run) + return _c +} + +// RESTMapper provides a mock function with given fields: +func (_m *K8SClientMock) RESTMapper() meta.RESTMapper { + ret := _m.Called() + + var r0 meta.RESTMapper + if rf, ok := ret.Get(0).(func() meta.RESTMapper); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(meta.RESTMapper) + } + } + + return r0 +} + +// K8SClientMock_RESTMapper_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'RESTMapper' +type K8SClientMock_RESTMapper_Call struct { + *mock.Call +} + +// RESTMapper is a helper method to define mock.On call +func (_e *K8SClientMock_Expecter) RESTMapper() *K8SClientMock_RESTMapper_Call { + return &K8SClientMock_RESTMapper_Call{Call: _e.mock.On("RESTMapper")} +} + +func (_c *K8SClientMock_RESTMapper_Call) Run(run func()) *K8SClientMock_RESTMapper_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *K8SClientMock_RESTMapper_Call) Return(_a0 meta.RESTMapper) *K8SClientMock_RESTMapper_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *K8SClientMock_RESTMapper_Call) RunAndReturn(run func() meta.RESTMapper) *K8SClientMock_RESTMapper_Call { + _c.Call.Return(run) + return _c +} + +// Scheme provides a mock function with given fields: +func (_m *K8SClientMock) Scheme() *runtime.Scheme { + ret := _m.Called() + + var r0 *runtime.Scheme + if rf, ok := ret.Get(0).(func() *runtime.Scheme); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*runtime.Scheme) + } + } + + return r0 +} + +// K8SClientMock_Scheme_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Scheme' +type K8SClientMock_Scheme_Call struct { + *mock.Call +} + +// Scheme is a helper method to define mock.On call +func (_e *K8SClientMock_Expecter) Scheme() *K8SClientMock_Scheme_Call { + return &K8SClientMock_Scheme_Call{Call: _e.mock.On("Scheme")} +} + +func (_c *K8SClientMock_Scheme_Call) Run(run func()) *K8SClientMock_Scheme_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *K8SClientMock_Scheme_Call) Return(_a0 *runtime.Scheme) *K8SClientMock_Scheme_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *K8SClientMock_Scheme_Call) RunAndReturn(run func() *runtime.Scheme) *K8SClientMock_Scheme_Call { + _c.Call.Return(run) + return _c +} + +// Status provides a mock function with given fields: +func (_m *K8SClientMock) Status() client.SubResourceWriter { + ret := _m.Called() + + var r0 client.SubResourceWriter + if rf, ok := ret.Get(0).(func() client.SubResourceWriter); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(client.SubResourceWriter) + } + } + + return r0 +} + +// K8SClientMock_Status_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Status' +type K8SClientMock_Status_Call struct { + *mock.Call +} + +// Status is a helper method to define mock.On call +func (_e *K8SClientMock_Expecter) Status() *K8SClientMock_Status_Call { + return &K8SClientMock_Status_Call{Call: _e.mock.On("Status")} +} + +func (_c *K8SClientMock_Status_Call) Run(run func()) *K8SClientMock_Status_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *K8SClientMock_Status_Call) Return(_a0 client.SubResourceWriter) *K8SClientMock_Status_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *K8SClientMock_Status_Call) RunAndReturn(run func() client.SubResourceWriter) *K8SClientMock_Status_Call { + _c.Call.Return(run) + return _c +} + +// SubResource provides a mock function with given fields: subResource +func (_m *K8SClientMock) SubResource(subResource string) client.SubResourceClient { + ret := _m.Called(subResource) + + var r0 client.SubResourceClient + if rf, ok := ret.Get(0).(func(string) client.SubResourceClient); ok { + r0 = rf(subResource) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(client.SubResourceClient) + } + } + + return r0 +} + +// K8SClientMock_SubResource_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SubResource' +type K8SClientMock_SubResource_Call struct { + *mock.Call +} + +// SubResource is a helper method to define mock.On call +// - subResource string +func (_e *K8SClientMock_Expecter) SubResource(subResource interface{}) *K8SClientMock_SubResource_Call { + return &K8SClientMock_SubResource_Call{Call: _e.mock.On("SubResource", subResource)} +} + +func (_c *K8SClientMock_SubResource_Call) Run(run func(subResource string)) *K8SClientMock_SubResource_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(string)) + }) + return _c +} + +func (_c *K8SClientMock_SubResource_Call) Return(_a0 client.SubResourceClient) *K8SClientMock_SubResource_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *K8SClientMock_SubResource_Call) RunAndReturn(run func(string) client.SubResourceClient) *K8SClientMock_SubResource_Call { + _c.Call.Return(run) + return _c +} + +// Update provides a mock function with given fields: ctx, obj, opts +func (_m *K8SClientMock) Update(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, obj) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, client.Object, ...client.UpdateOption) error); ok { + r0 = rf(ctx, obj, opts...) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// K8SClientMock_Update_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Update' +type K8SClientMock_Update_Call struct { + *mock.Call +} + +// Update is a helper method to define mock.On call +// - ctx context.Context +// - obj client.Object +// - opts ...client.UpdateOption +func (_e *K8SClientMock_Expecter) Update(ctx interface{}, obj interface{}, opts ...interface{}) *K8SClientMock_Update_Call { + return &K8SClientMock_Update_Call{Call: _e.mock.On("Update", + append([]interface{}{ctx, obj}, opts...)...)} +} + +func (_c *K8SClientMock_Update_Call) Run(run func(ctx context.Context, obj client.Object, opts ...client.UpdateOption)) *K8SClientMock_Update_Call { + _c.Call.Run(func(args mock.Arguments) { + variadicArgs := make([]client.UpdateOption, len(args)-2) + for i, a := range args[2:] { + if a != nil { + variadicArgs[i] = a.(client.UpdateOption) + } + } + run(args[0].(context.Context), args[1].(client.Object), variadicArgs...) + }) + return _c +} + +func (_c *K8SClientMock_Update_Call) Return(_a0 error) *K8SClientMock_Update_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *K8SClientMock_Update_Call) RunAndReturn(run func(context.Context, client.Object, ...client.UpdateOption) error) *K8SClientMock_Update_Call { + _c.Call.Return(run) + return _c +} + +type mockConstructorTestingTNewK8SClientMock interface { + mock.TestingT + Cleanup(func()) +} + +// NewK8SClientMock creates a new instance of K8SClientMock. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewK8SClientMock(t mockConstructorTestingTNewK8SClientMock) *K8SClientMock { + mock := &K8SClientMock{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/mocks/round_tripper.go b/mocks/round_tripper.go new file mode 100644 index 000000000..4efdbe5f1 --- /dev/null +++ b/mocks/round_tripper.go @@ -0,0 +1,95 @@ +// Code generated by mockery. DO NOT EDIT. + +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2023 Datadog, Inc. +package mocks + +import ( + http "net/http" + + mock "github.com/stretchr/testify/mock" +) + +// RoundTripperMock is an autogenerated mock type for the RoundTripper type +type RoundTripperMock struct { + mock.Mock +} + +type RoundTripperMock_Expecter struct { + mock *mock.Mock +} + +func (_m *RoundTripperMock) EXPECT() *RoundTripperMock_Expecter { + return &RoundTripperMock_Expecter{mock: &_m.Mock} +} + +// RoundTrip provides a mock function with given fields: _a0 +func (_m *RoundTripperMock) RoundTrip(_a0 *http.Request) (*http.Response, error) { + ret := _m.Called(_a0) + + var r0 *http.Response + var r1 error + if rf, ok := ret.Get(0).(func(*http.Request) (*http.Response, error)); ok { + return rf(_a0) + } + if rf, ok := ret.Get(0).(func(*http.Request) *http.Response); ok { + r0 = rf(_a0) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*http.Response) + } + } + + if rf, ok := ret.Get(1).(func(*http.Request) error); ok { + r1 = rf(_a0) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// RoundTripperMock_RoundTrip_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'RoundTrip' +type RoundTripperMock_RoundTrip_Call struct { + *mock.Call +} + +// RoundTrip is a helper method to define mock.On call +// - _a0 *http.Request +func (_e *RoundTripperMock_Expecter) RoundTrip(_a0 interface{}) *RoundTripperMock_RoundTrip_Call { + return &RoundTripperMock_RoundTrip_Call{Call: _e.mock.On("RoundTrip", _a0)} +} + +func (_c *RoundTripperMock_RoundTrip_Call) Run(run func(_a0 *http.Request)) *RoundTripperMock_RoundTrip_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*http.Request)) + }) + return _c +} + +func (_c *RoundTripperMock_RoundTrip_Call) Return(_a0 *http.Response, _a1 error) *RoundTripperMock_RoundTrip_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *RoundTripperMock_RoundTrip_Call) RunAndReturn(run func(*http.Request) (*http.Response, error)) *RoundTripperMock_RoundTrip_Call { + _c.Call.Return(run) + return _c +} + +type mockConstructorTestingTNewRoundTripperMock interface { + mock.TestingT + Cleanup(func()) +} + +// NewRoundTripperMock creates a new instance of RoundTripperMock. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewRoundTripperMock(t mockConstructorTestingTNewRoundTripperMock) *RoundTripperMock { + mock := &RoundTripperMock{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/services/chaospod.go b/services/chaospod.go new file mode 100644 index 000000000..07d656e2a --- /dev/null +++ b/services/chaospod.go @@ -0,0 +1,642 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2023 Datadog, Inc. + +package services + +import ( + "context" + "fmt" + "reflect" + "strings" + "time" + + chaosapi "github.com/DataDog/chaos-controller/api" + chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1" + "github.com/DataDog/chaos-controller/cloudservice" + "github.com/DataDog/chaos-controller/env" + "github.com/DataDog/chaos-controller/o11y/metrics" + "github.com/DataDog/chaos-controller/targetselector" + chaostypes "github.com/DataDog/chaos-controller/types" + "github.com/cenkalti/backoff" + "go.uber.org/zap" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +// ChaosPodService is an interface that defines methods for managing chaos pods of a disruption on Kubernetes pods. +type ChaosPodService interface { + // GetChaosPodsOfDisruption retrieves a list chaos pods of a disruption for the given labels. + GetChaosPodsOfDisruption(ctx context.Context, instance *chaosv1beta1.Disruption, ls labels.Set) ([]corev1.Pod, error) + + // HandleChaosPodTermination handles the termination of a chaos pod during a disruption event. + HandleChaosPodTermination(ctx context.Context, disruption *chaosv1beta1.Disruption, pod *corev1.Pod) (bool, error) + + // DeletePod deletes a pod from the Kubernetes cluster. + DeletePod(ctx context.Context, pod corev1.Pod) bool + + // GenerateChaosPodOfDisruption generates a pod for the disruption. + GenerateChaosPodOfDisruption(disruption *chaosv1beta1.Disruption, targetName, targetNodeName string, args []string, kind chaostypes.DisruptionKindName) corev1.Pod + + // GenerateChaosPodsOfDisruption generates a list of chaos pods for the disruption. + GenerateChaosPodsOfDisruption(instance *chaosv1beta1.Disruption, targetName, targetNodeName string, targetContainers map[string]string, targetPodIP string) ([]corev1.Pod, error) + + // GetPodInjectorArgs retrieves arguments to inject into a pod. + GetPodInjectorArgs(pod corev1.Pod) []string + + // CreatePod creates a pod in the Kubernetes cluster. + CreatePod(ctx context.Context, pod *corev1.Pod) error + + // WaitForPodCreation waits for a pod to be created in the Kubernetes cluster. + WaitForPodCreation(ctx context.Context, pod corev1.Pod) error + + // HandleOrphanedChaosPods handles orphaned chaos pods based on a controller request. + HandleOrphanedChaosPods(ctx context.Context, req ctrl.Request) error +} + +// ChaosPodServiceInjectorConfig contains configuration options for the injector. +type ChaosPodServiceInjectorConfig struct { + ServiceAccount string // Service account to be used by the injector. + Image string // Image to be used for the injector. + Annotations, Labels map[string]string // Annotations and labels to be applied to injected pods. + NetworkDisruptionAllowedHosts []string // List of hosts allowed during network disruption. + DNSDisruptionDNSServer string // DNS server to be used for DNS disruption. + DNSDisruptionKubeDNS string // KubeDNS server to be used for DNS disruption. + ImagePullSecrets string // Image pull secrets for the injector. +} + +// ChaosPodServiceConfig contains configuration options for the chaosPodService. +type ChaosPodServiceConfig struct { + Client client.Client // Kubernetes client for interacting with the API server. + Log *zap.SugaredLogger // Logger for logging. + ChaosNamespace string // Namespace where chaos-related resources are located. + TargetSelector targetselector.TargetSelector // Target selector for selecting target pods. + Injector ChaosPodServiceInjectorConfig // Configuration options for the injector. + ImagePullSecrets string // Image pull secrets for the chaosPodService. + MetricsSink metrics.Sink // Sink for exporting metrics. + CloudServicesProvidersManager cloudservice.CloudServicesProvidersManager // Manager for cloud service providers. +} + +type chaosPodService struct { + config ChaosPodServiceConfig +} + +type ChaosPodAllowedErrors map[string]struct{} + +func (c ChaosPodAllowedErrors) isNotAllowed(errorMsg string) bool { + _, allowed := chaosPodAllowedErrors[errorMsg] + + return !allowed +} + +var chaosPodAllowedErrors = ChaosPodAllowedErrors{ + "pod is not running": {}, + "node is not ready": {}, +} + +// NewChaosPodService create a new chaos pod service instance with the provided configuration. +func NewChaosPodService(config ChaosPodServiceConfig) (ChaosPodService, error) { + if config.Client == nil { + return nil, fmt.Errorf("you must provide a non nil Kubernetes client") + } + + return &chaosPodService{ + config: config, + }, nil +} + +// CreatePod creates a pod in the Kubernetes cluster. +func (m *chaosPodService) CreatePod(ctx context.Context, pod *corev1.Pod) error { + return m.config.Client.Create(ctx, pod) +} + +// GetChaosPodsOfDisruption retrieves a list of chaos-related pods affected by a disruption event, +// filtered by the provided labels. +func (m *chaosPodService) GetChaosPodsOfDisruption(ctx context.Context, instance *chaosv1beta1.Disruption, ls labels.Set) ([]corev1.Pod, error) { + return chaosv1beta1.GetChaosPods(ctx, m.config.Log, m.config.ChaosNamespace, m.config.Client, instance, ls) +} + +// HandleChaosPodTermination handles the termination of a chaos-related pod during a disruption event. +func (m *chaosPodService) HandleChaosPodTermination(ctx context.Context, disruption *chaosv1beta1.Disruption, chaosPod *corev1.Pod) (bool, error) { + // Ignore chaos pods not having the finalizer anymore + if !controllerutil.ContainsFinalizer(chaosPod, chaostypes.ChaosPodFinalizer) { + return true, nil + } + + // Ignore chaos pods that are not being deleted + if chaosPod.DeletionTimestamp.IsZero() { + return false, nil + } + + target := chaosPod.Labels[chaostypes.TargetLabel] + + // Check if the target of the disruption is healthy (running and ready). + if err := m.config.TargetSelector.TargetIsHealthy(target, m.config.Client, disruption); err != nil { + // return the error unless we have a specific reason to ignore it. + if !apierrors.IsNotFound(err) && chaosPodAllowedErrors.isNotAllowed(strings.ToLower(err.Error())) { + return false, err + } + + // If the target is not in a good shape, proceed with cleanup phase. + m.config.Log.Infow("Target is not likely to be cleaned (either it does not exist anymore or it is not ready), the injector will TRY to clean it but will not take care about any failures", "target", target) + + // Remove the finalizer for the chaos pod since cleanup won't be fully reliable. + if err := m.removeFinalizerForChaosPod(ctx, chaosPod); err != nil { + return false, err + } + + return true, nil + } + + // It is always safe to remove some chaos pods. It is usually hard to tell if these chaos pods have + // succeeded or not, but they have no possibility of leaving side effects, so we choose to always remove the finalizer. + if chaosv1beta1.DisruptionHasNoSideEffects(chaosPod.Labels[chaostypes.DisruptionKindLabel]) { + if err := m.removeFinalizerForChaosPod(ctx, chaosPod); err != nil { + return false, err + } + + return true, nil + } + + // If the finalizer cannot be removed yet, return without removing it. + if m.isFinalizerNotRemovableForChaosPod(chaosPod) { + return false, nil + } + + // Remove the finalizer for the chaos pod since cleanup was successful. + if err := m.removeFinalizerForChaosPod(ctx, chaosPod); err != nil { + return false, err + } + + return true, nil +} + +// DeletePod attempts to delete the specified pod from the Kubernetes cluster. +// Returns true if deletion was successful, otherwise returns false. +func (m *chaosPodService) DeletePod(ctx context.Context, pod corev1.Pod) bool { + if err := m.deletePod(ctx, pod); err != nil { + m.config.Log.Errorw("Error terminating chaos pod", "error", err, "chaosPod", pod.Name) + + return false + } + + return true +} + +// GenerateChaosPodsOfDisruption generates a list of chaos pods for the given disruption instance, +// target information, and other configuration parameters. +func (m *chaosPodService) GenerateChaosPodsOfDisruption(instance *chaosv1beta1.Disruption, targetName string, targetNodeName string, targetContainers map[string]string, targetPodIP string) ([]corev1.Pod, error) { + pods := []corev1.Pod{} + + // generate chaos pods for each possible disruptions + for _, kind := range chaostypes.DisruptionKindNames { + subspec := instance.Spec.DisruptionKindPicker(kind) + if reflect.ValueOf(subspec).IsNil() { + continue + } + + pulseActiveDuration, pulseDormantDuration, pulseInitialDelay := time.Duration(0), time.Duration(0), time.Duration(0) + if instance.Spec.Pulse != nil { + pulseInitialDelay = instance.Spec.Pulse.InitialDelay.Duration() + pulseActiveDuration = instance.Spec.Pulse.ActiveDuration.Duration() + pulseDormantDuration = instance.Spec.Pulse.DormantDuration.Duration() + } + + notInjectedBefore := instance.TimeToInject() + + allowedHosts := m.config.Injector.NetworkDisruptionAllowedHosts + + // get the ip ranges of cloud provider services + if instance.Spec.Network != nil { + if instance.Spec.Network.Cloud != nil { + hosts, err := chaosv1beta1.TransformCloudSpecToHostsSpec(m.config.CloudServicesProvidersManager, instance.Spec.Network.Cloud) + if err != nil { + return nil, err + } + + instance.Spec.Network.Hosts = append(instance.Spec.Network.Hosts, hosts...) + } + + // remove default allowed hosts if disabled + if instance.Spec.Network.DisableDefaultAllowedHosts { + allowedHosts = make([]string, 0) + } + } + + xargs := chaosapi.DisruptionArgs{ + Level: instance.Spec.Level, + Kind: kind, + TargetContainers: targetContainers, + TargetName: targetName, + TargetNodeName: targetNodeName, + TargetPodIP: targetPodIP, + DryRun: instance.Spec.DryRun, + DisruptionName: instance.Name, + DisruptionNamespace: instance.Namespace, + OnInit: instance.Spec.OnInit, + PulseInitialDelay: pulseInitialDelay, + PulseActiveDuration: pulseActiveDuration, + PulseDormantDuration: pulseDormantDuration, + NotInjectedBefore: notInjectedBefore, + MetricsSink: m.config.MetricsSink.GetSinkName(), + AllowedHosts: allowedHosts, + DNSServer: m.config.Injector.DNSDisruptionDNSServer, + KubeDNS: m.config.Injector.DNSDisruptionKubeDNS, + ChaosNamespace: m.config.ChaosNamespace, + } + + args := xargs.CreateCmdArgs(subspec.GenerateArgs()) + + pod := m.GenerateChaosPodOfDisruption(instance, targetName, targetNodeName, args, kind) + + pods = append(pods, pod) + } + + return pods, nil +} + +// GenerateChaosPodOfDisruption generates a chaos pod for a specific disruption. +func (m *chaosPodService) GenerateChaosPodOfDisruption(disruption *chaosv1beta1.Disruption, targetName, targetNodeName string, args []string, kind chaostypes.DisruptionKindName) (chaosPod corev1.Pod) { + // volume host path type definitions + hostPathDirectory := corev1.HostPathDirectory + hostPathFile := corev1.HostPathFile + + // The default TerminationGracePeriodSeconds is 30s. This can be too low for a chaos pod to finish cleaning. After TGPS passes, + // the signal sent to a pod becomes SIGKILL, which will interrupt any in-progress cleaning. By double this to 1 minute in the pod podSpec itself, + // ensures that whether a chaos pod is deleted directly or by deleting a disruption, it will have time to finish cleaning up after itself. + terminationGracePeriod := int64(60) // 60 seconds + + // Chaos pods will clean themselves automatically when duration expires, so we set activeDeadlineSeconds to ten seconds after that + // to give time for cleaning + activeDeadlineSeconds := int64(disruption.RemainingDuration().Seconds()) + 10 + + args = append(args, + "--deadline", time.Now().Add(disruption.RemainingDuration()).Format(time.RFC3339)) + + chaosPod = corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: fmt.Sprintf("chaos-%s-", disruption.Name), // generate the pod name automatically with a prefix + Namespace: m.config.ChaosNamespace, // chaos pods need to be in the same namespace as their service account to run + Annotations: m.config.Injector.Annotations, // add extra annotations passed to the controller + Labels: m.generateLabels(disruption, targetName, kind), // add default and extra podLabels passed to the controller + }, + Spec: m.generateChaosPodSpec(targetNodeName, terminationGracePeriod, activeDeadlineSeconds, args, hostPathDirectory, hostPathFile), + } + + // add finalizer to the pod, so it is not deleted before we can control its exit status + controllerutil.AddFinalizer(&chaosPod, chaostypes.ChaosPodFinalizer) + + return chaosPod +} + +// GetPodInjectorArgs retrieves the arguments used by the "injector" container in a chaos pod. +func (m *chaosPodService) GetPodInjectorArgs(chaosPod corev1.Pod) []string { + chaosPodArgs := []string{} + + if len(chaosPod.Spec.Containers) == 0 { + m.config.Log.Errorw("no containers found in chaos pod spec", "chaosPodSpec", chaosPod.Spec) + + return chaosPodArgs + } + + for _, container := range chaosPod.Spec.Containers { + if container.Name == "injector" { + chaosPodArgs = container.Args + } + } + + if len(chaosPodArgs) == 0 { + m.config.Log.Warnw("unable to find the args for this chaos pod", "chaosPodName", chaosPod.Name, "chaosPodSpec", chaosPod.Spec, "chaosPodContainerCount", len(chaosPod.Spec.Containers)) + } + + return chaosPodArgs +} + +// WaitForPodCreation waits for the given pod to be created +// it tries to get the pod using an exponential backoff with a max retry interval of 1 second and a max duration of 30 seconds +// if an unexpected error occurs (an error other than a "not found" error), the retry loop is stopped +func (m *chaosPodService) WaitForPodCreation(ctx context.Context, pod corev1.Pod) error { + expBackoff := backoff.NewExponentialBackOff() + expBackoff.MaxInterval = time.Second + expBackoff.MaxElapsedTime = 30 * time.Second + + return backoff.Retry(func() error { + err := m.config.Client.Get(ctx, types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, &pod) + if client.IgnoreNotFound(err) != nil { + return backoff.Permanent(err) + } + + return err + }, expBackoff) +} + +// HandleOrphanedChaosPods handles orphaned chaos pods related to a specific disruption. +func (m *chaosPodService) HandleOrphanedChaosPods(ctx context.Context, req ctrl.Request) error { + ls := make(map[string]string) + + // Set labels for filtering chaos pods related to the specified disruption. + ls[chaostypes.DisruptionNameLabel] = req.Name + ls[chaostypes.DisruptionNamespaceLabel] = req.Namespace + + // Retrieve chaos pods matching the specified labels. + pods, err := m.GetChaosPodsOfDisruption(ctx, nil, ls) + if err != nil { + return err + } + + for _, pod := range pods { + m.handleMetricSinkError(m.config.MetricsSink.MetricOrphanFound([]string{"disruption:" + req.Name, "chaosPod:" + pod.Name, "namespace:" + req.Namespace})) + + target := pod.Labels[chaostypes.TargetLabel] + + var p corev1.Pod + + m.config.Log.Infow("checking if we can clean up orphaned chaos pod", "chaosPod", pod.Name, "target", target) + + // if target doesn't exist, we can try to clean up the chaos pod + if err = m.config.Client.Get(ctx, types.NamespacedName{Name: target, Namespace: req.Namespace}, &p); apierrors.IsNotFound(err) { + m.config.Log.Warnw("orphaned chaos pod detected, will attempt to delete", "chaosPod", pod.Name) + + if err = m.removeFinalizerForChaosPod(ctx, &pod); err != nil { + continue + } + + // if the chaos pod still exists after having its finalizer removed, delete it + if err = m.deletePod(ctx, pod); err != nil { + if chaosv1beta1.IsUpdateConflictError(err) { + m.config.Log.Infow("retryable error deleting orphaned chaos pod", "error", err, "chaosPod", pod.Name) + } else { + m.config.Log.Errorw("error deleting orphaned chaos pod", "error", err, "chaosPod", pod.Name) + } + } + } + } + + return nil +} + +func (m *chaosPodService) generateLabels(disruption *chaosv1beta1.Disruption, targetName string, kind chaostypes.DisruptionKindName) map[string]string { + podLabels := make(map[string]string) + + for k, v := range m.config.Injector.Labels { + podLabels[k] = v + } + + podLabels[chaostypes.TargetLabel] = targetName // target name label + podLabels[chaostypes.DisruptionKindLabel] = string(kind) // disruption kind label + podLabels[chaostypes.DisruptionNameLabel] = disruption.Name // disruption name label, used to determine ownership + podLabels[chaostypes.DisruptionNamespaceLabel] = disruption.Namespace // disruption namespace label, used to determine ownership + + return podLabels +} + +func (m *chaosPodService) generateChaosPodSpec(targetNodeName string, terminationGracePeriod int64, activeDeadlineSeconds int64, args []string, hostPathDirectory corev1.HostPathType, hostPathFile corev1.HostPathType) corev1.PodSpec { + podSpec := corev1.PodSpec{ + HostPID: true, // enable host pid + RestartPolicy: corev1.RestartPolicyNever, // do not restart the pod on fail or completion + NodeName: targetNodeName, // specify node name to schedule the pod + ServiceAccountName: m.config.Injector.ServiceAccount, // service account to use + TerminationGracePeriodSeconds: &terminationGracePeriod, + ActiveDeadlineSeconds: &activeDeadlineSeconds, + Containers: []corev1.Container{ + { + Name: "injector", // container name + Image: m.config.Injector.Image, // container image gathered from controller flags + ImagePullPolicy: corev1.PullIfNotPresent, // pull the image only when it is not present + Args: args, // pass disruption arguments + SecurityContext: &corev1.SecurityContext{ + Privileged: func() *bool { b := true; return &b }(), // enable privileged mode + }, + ReadinessProbe: &corev1.Probe{ // define readiness probe (file created by the injector when the injection is successful) + PeriodSeconds: 1, + FailureThreshold: 5, + ProbeHandler: corev1.ProbeHandler{ + Exec: &corev1.ExecAction{ + Command: []string{"test", "-f", "/tmp/readiness_probe"}, + }, + }, + }, + Resources: corev1.ResourceRequirements{ // set resources requests and limits to zero + Limits: corev1.ResourceList{ + corev1.ResourceCPU: *resource.NewQuantity(0, resource.DecimalSI), + corev1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: *resource.NewQuantity(0, resource.DecimalSI), + corev1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI), + }, + }, + Env: []corev1.EnvVar{ // define environment variables + { + Name: env.InjectorTargetPodHostIP, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "status.hostIP", + }, + }, + }, + { + Name: env.InjectorChaosPodIP, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "status.podIP", + }, + }, + }, + { + Name: env.InjectorPodName, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }, + { + Name: env.InjectorMountHost, + Value: "/mnt/host/", + }, + { + Name: env.InjectorMountProc, + Value: "/mnt/host/proc/", + }, + { + Name: env.InjectorMountSysrq, + Value: "/mnt/sysrq", + }, + { + Name: env.InjectorMountSysrqTrigger, + Value: "/mnt/sysrq-trigger", + }, + { + Name: env.InjectorMountCgroup, + Value: "/mnt/cgroup/", + }, + }, + VolumeMounts: []corev1.VolumeMount{ // define volume mounts required for disruptions to work + { + Name: "run", + MountPath: "/run", + }, + { + Name: "sysrq", + MountPath: "/mnt/sysrq", + }, + { + Name: "sysrq-trigger", + MountPath: "/mnt/sysrq-trigger", + }, + { + Name: "cgroup", + MountPath: "/mnt/cgroup", + }, + { + Name: "host", + MountPath: "/mnt/host", + ReadOnly: true, + }, + }, + }, + }, + Volumes: []corev1.Volume{ // declare volumes required for disruptions to work + { + Name: "run", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/run", + Type: &hostPathDirectory, + }, + }, + }, + { + Name: "proc", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/proc", + Type: &hostPathDirectory, + }, + }, + }, + { + Name: "sysrq", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/proc/sys/kernel/sysrq", + Type: &hostPathFile, + }, + }, + }, + { + Name: "sysrq-trigger", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/proc/sysrq-trigger", + Type: &hostPathFile, + }, + }, + }, + { + Name: "cgroup", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/sys/fs/cgroup", + Type: &hostPathDirectory, + }, + }, + }, + { + Name: "host", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/", + Type: &hostPathDirectory, + }, + }, + }, + }, + } + + if m.config.ImagePullSecrets != "" { + podSpec.ImagePullSecrets = []corev1.LocalObjectReference{ + { + Name: m.config.ImagePullSecrets, + }, + } + } + + return podSpec +} + +func (m *chaosPodService) removeFinalizerForChaosPod(ctx context.Context, chaosPod *corev1.Pod) error { + controllerutil.RemoveFinalizer(chaosPod, chaostypes.ChaosPodFinalizer) + + if err := m.config.Client.Update(ctx, chaosPod); err != nil { + if chaosv1beta1.IsUpdateConflictError(err) { + m.config.Log.Debugw("cannot remove chaos pod finalizer, need to re-reconcile", "error", err) + } else { + m.config.Log.Errorw("error removing chaos pod finalizer", "error", err, "chaosPod", chaosPod.Name) + } + + return err + } + + return nil +} + +func (m *chaosPodService) isFinalizerNotRemovableForChaosPod(chaosPod *corev1.Pod) bool { + switch chaosPod.Status.Phase { + case corev1.PodSucceeded, corev1.PodPending: + // we can remove the pod and the finalizer, so that it'll be garbage collected + return false + case corev1.PodFailed: + // we need to determine if we can remove it safely or if we need to block disruption deletion + // check if a container has been created (if not, the disruption was not injected) + if len(chaosPod.Status.ContainerStatuses) == 0 { + return false + } + + // if the pod died only because it exceeded its activeDeadlineSeconds, we can remove the finalizer + if chaosPod.Status.Reason == "DeadlineExceeded" { + return false + } + + // check if the container was able to start or not + // if not, we can safely delete the pod since the disruption was not injected + for _, cs := range chaosPod.Status.ContainerStatuses { + if cs.Name != "injector" { + continue + } + + if cs.State.Terminated != nil && cs.State.Terminated.Reason == "StartError" { + return false + } + + break + } + } + + return true +} + +func (m *chaosPodService) handleMetricSinkError(err error) { + if err != nil { + m.config.Log.Errorw("error sending a metric", "error", err) + } +} + +func (m *chaosPodService) deletePod(ctx context.Context, pod corev1.Pod) error { + // Attempt to delete the pod using the Kubernetes client. + // Ignore "not found" errors using client.IgnoreNotFound to avoid returning an error if the pod is already deleted. + if err := m.config.Client.Delete(ctx, &pod); client.IgnoreNotFound(err) != nil { + return err + } + + return nil +} diff --git a/services/chaospod_test.go b/services/chaospod_test.go new file mode 100644 index 000000000..d714285b1 --- /dev/null +++ b/services/chaospod_test.go @@ -0,0 +1,1476 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2023 Datadog, Inc. + +package services_test + +import ( + "context" + "fmt" + "net/http" + "time" + + chaosapi "github.com/DataDog/chaos-controller/api" + chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1" + builderstest "github.com/DataDog/chaos-controller/builderstest" + "github.com/DataDog/chaos-controller/cloudservice" + cloudtypes "github.com/DataDog/chaos-controller/cloudservice/types" + "github.com/DataDog/chaos-controller/mocks" + "github.com/DataDog/chaos-controller/o11y/metrics" + "github.com/DataDog/chaos-controller/services" + "github.com/DataDog/chaos-controller/targetselector" + chaostypes "github.com/DataDog/chaos-controller/types" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/stretchr/testify/mock" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +const ( + DefaultNamespace = "namespace" + DefaultChaosNamespace = "chaos-namespace" + DefaultDisruptionName = "name" + DefaultTargetName = "lorem" + DefaultTargetNodeName = "ipsum" + DefaultTargetPodIp = "10.10.10.10" + DefaultHostPathDirectory = v1.HostPathDirectory + DefaultPathFile = v1.HostPathFile + DefaultImagePullSecrets = "pull-secret" + DefaultInjectorServiceAccount = "lorem" + DefaultInjectorImage = "image" + DefaultInjectorDNSDisruptionDNSServer = "8.8.8.8" + DefaultInjectorDNSDisruptionKubeDNS = "9.9.9.9" + DefaultMetricsSinkName = "name" +) + +var _ = Describe("Chaos Pod Service", func() { + + var ( + chaosPod v1.Pod + disruption *chaosv1beta1.Disruption + k8sClientMock *mocks.K8SClientMock + metricsSinkMock *metrics.SinkMock + cloudServicesProvidersManagerMock *cloudservice.CloudServicesProvidersManagerMock + targetSelectorMock *targetselector.TargetSelectorMock + chaosPodServiceConfig services.ChaosPodServiceConfig + chaosPodService services.ChaosPodService + err error + chaosPods []v1.Pod + ) + + BeforeEach(func() { + // Arrange + k8sClientMock = mocks.NewK8SClientMock(GinkgoT()) + targetSelectorMock = targetselector.NewTargetSelectorMock(GinkgoT()) + metricsSinkMock = metrics.NewSinkMock(GinkgoT()) + cloudServicesProvidersManagerMock = cloudservice.NewCloudServicesProvidersManagerMock(GinkgoT()) + disruption = &chaosv1beta1.Disruption{ + ObjectMeta: metav1.ObjectMeta{ + Name: DefaultDisruptionName, + Namespace: DefaultNamespace, + }, + } + chaosPodServiceConfig = services.ChaosPodServiceConfig{} + }) + + JustBeforeEach(func() { + // Arrange + chaosPodServiceConfig.Log = logger + chaosPodServiceConfig.ChaosNamespace = DefaultChaosNamespace + chaosPodServiceConfig.MetricsSink = metricsSinkMock + chaosPodServiceConfig.TargetSelector = targetSelectorMock + chaosPodServiceConfig.CloudServicesProvidersManager = cloudServicesProvidersManagerMock + if chaosPodServiceConfig.Client == nil { + chaosPodServiceConfig.Client = k8sClientMock + } + + // Action + chaosPodService, err = services.NewChaosPodService(chaosPodServiceConfig) + Expect(err).ShouldNot(HaveOccurred()) + }) + + Describe("NewChaosPodService", func() { + Context("with valid inputs", func() { + It("should return a valid service", func() { + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("return a valid chaosPodService") + Expect(chaosPodService).ShouldNot(BeNil()) + }) + }) + + Context("with a nil k8s client", func() { + It("should return an error", func() { + // Arrange + chaosPodServiceConfig.Client = nil + + // Action + chaosPodService, err := services.NewChaosPodService(chaosPodServiceConfig) + + // Assert + By("return an error") + Expect(err).Should(HaveOccurred()) + Expect(err).To(MatchError("you must provide a non nil Kubernetes client")) + + By("not return a chaos pod service") + Expect(chaosPodService).To(BeNil()) + }) + }) + }) + + Describe("GetChaosPodsOfDisruption", func() { + + var ( + labelSets labels.Set + ) + + BeforeEach(func() { + // Arrange + labelSets = labels.Set{} + }) + + JustBeforeEach(func() { + // Action + chaosPods, err = chaosPodService.GetChaosPodsOfDisruption(context.Background(), disruption, labelSets) + }) + + Context("with three pods", func() { + + var ( + firstChaosPod, secondChaosPod, nonChaosPod v1.Pod + nonChaosPodName = "pod-3" + chaosPodsObjects = []client.Object{ + &firstChaosPod, + &secondChaosPod, + &nonChaosPod, + } + fakeClient client.Client + ) + + BeforeEach(func() { + // Arrange + firstChaosPod = builderstest.NewPodBuilder("pod-1", DefaultChaosNamespace).WithChaosPodLabels(DefaultDisruptionName, DefaultNamespace, "", "").Build() + secondChaosPod = builderstest.NewPodBuilder("pod-2", DefaultChaosNamespace).WithChaosPodLabels(DefaultDisruptionName, DefaultNamespace, "", "").Build() + nonChaosPod = builderstest.NewPodBuilder(nonChaosPodName, DefaultChaosNamespace).Build() + + fakeClient = fake.NewClientBuilder().WithObjects(chaosPodsObjects...).Build() + chaosPodServiceConfig.Client = fakeClient + }) + + DescribeTable("success cases", func(ls labels.Set) { + // Arrange + chaosPodService, err := services.NewChaosPodService(chaosPodServiceConfig) + Expect(err).ShouldNot(HaveOccurred()) + + // Action + chaosPods, err := chaosPodService.GetChaosPodsOfDisruption(context.Background(), disruption, ls) + + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("return a list of two pods") + Expect(chaosPods).ToNot(BeEmpty()) + Expect(chaosPods).Should(HaveLen(2)) + + for _, chaosPod := range chaosPods { + Expect(chaosPod).ToNot(Equal(nonChaosPodName)) + Expect(chaosPod.Namespace).Should(Equal(DefaultChaosNamespace)) + Expect(chaosPod.Labels[chaostypes.DisruptionNameLabel]).Should(Equal(DefaultDisruptionName)) + Expect(chaosPod.Labels[chaostypes.DisruptionNamespaceLabel]).Should(Equal(DefaultNamespace)) + } + }, + Entry("with an empty label set", + labels.Set{}, + ), + Entry("with a nil label set", + nil, + ), + ) + + Context("with a nil disruption and an empty label set", func() { + + BeforeEach(func() { + // Arrange + disruption = nil + labelSets = labels.Set{} + }) + + Describe("success cases", func() { + It("should return a list of all chaos pods", func() { + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("return a list of two pods") + Expect(chaosPods).ToNot(BeEmpty()) + Expect(chaosPods).Should(HaveLen(len(chaosPodsObjects))) + }) + }) + }) + }) + + Describe("failed cases", func() { + When("the k8s client return an error", func() { + + BeforeEach(func() { + // Arrange + k8sClientMock.EXPECT().List(mock.Anything, mock.Anything, mock.Anything).Return(fmt.Errorf("error")) + }) + + It("should propagate the error", func() { + // Assert + By("return the error") + Expect(err).Should(HaveOccurred()) + Expect(err).To(MatchError("error listing owned pods: error")) + + By("return an empty list of chaos pods") + Expect(chaosPods).To(BeEmpty()) + }) + }) + }) + }) + + Describe("HandleChaosPodTermination", func() { + + var ( + isFinalizerRemoved bool + cpBuilder *builderstest.ChaosPodBuilder + ) + + BeforeEach(func() { + // Arrange + cpBuilder = builderstest.NewPodBuilder("test-1", DefaultChaosNamespace) + targetSelectorMock.EXPECT().TargetIsHealthy(mock.Anything, mock.Anything, mock.Anything).Return(nil).Maybe() + }) + + JustBeforeEach(func() { + // Arrange + chaosPod = cpBuilder.Build() + + // Action + isFinalizerRemoved, err = chaosPodService.HandleChaosPodTermination(context.Background(), disruption, &chaosPod) + }) + + DescribeTable("success cases", func(chaosPodBuilder *builderstest.ChaosPodBuilder) { + // Arrange + chaosPod := chaosPodBuilder.WithDeletion().WithChaosFinalizer().Build() + target := chaosPod.Labels[chaostypes.TargetLabel] + + By("update the chaos pod object without the finalizer") + k8sClientMock.EXPECT().Update(mock.Anything, &chaosPod).Return(nil) + + By("check if the TargetIsHealthy") + targetSelectorMock.ExpectedCalls = nil + targetSelectorMock.EXPECT().TargetIsHealthy(target, k8sClientMock, disruption).Return(nil) + + chaosPodService, err := services.NewChaosPodService(chaosPodServiceConfig) + Expect(err).ShouldNot(HaveOccurred()) + + // Action + isRemoved, err := chaosPodService.HandleChaosPodTermination(context.Background(), disruption, &chaosPod) + + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("remove the finalizer") + Expect(chaosPod.GetFinalizers()).Should(Equal([]string{})) + Expect(isRemoved).To(BeTrue()) + }, + Entry( + "with a success pod", + builderstest.NewPodBuilder( + "test", + DefaultNamespace, + ).WithStatusPhase(v1.PodSucceeded)), + Entry( + "with a pending pod", + builderstest.NewPodBuilder( + "test", + DefaultNamespace, + ).WithStatusPhase(v1.PodPending)), + Entry( + "with a failed pod", + builderstest.NewPodBuilder( + "test", + DefaultNamespace, + ).WithStatusPhase(v1.PodFailed)), + Entry( + "with failed a pod exceeding its activeDeadlineSeconds", + builderstest.NewPodBuilder( + "test", + DefaultNamespace, + ).WithStatus(v1.PodStatus{ + Phase: v1.PodFailed, + Reason: "DeadlineExceeded", + ContainerStatuses: []v1.ContainerStatus{{}}, + })), + Entry( + "with a failed pod and an container injector in error state", + builderstest.NewPodBuilder( + "test", + DefaultNamespace, + ).WithStatus(v1.PodStatus{ + Phase: v1.PodFailed, + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "injector", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + Reason: "StartError", + }, + }, + }, + }, + })), + Entry( + "with pod and an container injector in error state", + builderstest.NewPodBuilder( + "test", + DefaultNamespace, + ).WithStatus(v1.PodStatus{ + Phase: v1.PodFailed, + ContainerStatuses: []v1.ContainerStatus{ + { + Name: "injector", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + Reason: "StartError", + }, + }, + }, + }, + })), + Entry( + "with node failure running chaos pod", + builderstest.NewPodBuilder( + "test", + DefaultNamespace, + ).WithChaosPodLabels(DefaultDisruptionName, DefaultNamespace, "", chaostypes.DisruptionKindNodeFailure).WithStatusPhase(v1.PodRunning)), + Entry( + "with container failure running chaos pod", + builderstest.NewPodBuilder( + "test", + DefaultNamespace, + ).WithChaosPodLabels(DefaultDisruptionName, DefaultNamespace, "", chaostypes.DisruptionKindContainerFailure).WithStatusPhase(v1.PodRunning)), + ) + + DescribeTable("failures", func(chaosPod v1.Pod) { + // Arrange + target := chaosPod.Labels[chaostypes.TargetLabel] + + By("check if the TargetIsHealthy") + targetSelectorMock.EXPECT().TargetIsHealthy(target, k8sClientMock, disruption).Return(nil) + + chaosPodService, err := services.NewChaosPodService(chaosPodServiceConfig) + Expect(err).ShouldNot(HaveOccurred()) + + // Action + isRemoved, err := chaosPodService.HandleChaosPodTermination(context.Background(), disruption, &chaosPod) + + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("not update the chaos pod object") + k8sClientMock.AssertNotCalled(GinkgoT(), "Update") + + By("not remove the finalizer") + Expect(chaosPod.GetFinalizers()).Should(Equal([]string{chaostypes.ChaosPodFinalizer})) + Expect(isRemoved).To(BeFalse()) + }, + Entry("with a running pod", + builderstest.NewPodBuilder( + "test", + DefaultNamespace, + ).WithChaosPodLabels(DefaultDisruptionName, DefaultNamespace, "", "").WithDeletion().WithChaosFinalizer().WithStatusPhase(v1.PodRunning).Build()), + Entry("with a failed pod with containers", + builderstest.NewPodBuilder( + "test", + DefaultNamespace, + ).WithChaosPodLabels(DefaultDisruptionName, DefaultNamespace, "", "").WithDeletion().WithChaosFinalizer().WithStatusPhase( + v1.PodFailed, + ).WithContainerStatuses([]v1.ContainerStatus{{Name: "test-1"}}).Build()), + Entry("with a failed pod with containers and a running injector", + builderstest.NewPodBuilder( + "test", + DefaultNamespace, + ).WithChaosPodLabels(DefaultDisruptionName, DefaultNamespace, "", "").WithDeletion().WithChaosFinalizer().WithStatusPhase( + v1.PodFailed, + ).WithContainerStatuses([]v1.ContainerStatus{{Name: "injector"}}).Build()), + ) + + Context("with a chaos pod ready to be deleted", func() { + + BeforeEach(func() { + // Arrange + cpBuilder.WithChaosPodLabels(DefaultDisruptionName, DefaultNamespace, "", "").WithDeletion().WithChaosFinalizer() + }) + + Describe("success cases", func() { + DescribeTable("when the TargetIsHealthy return an allowed error", func(targetErrStatus metav1.Status) { + // Arrange + By("check if the target is healthy") + errorStatus := errors.StatusError{ErrStatus: targetErrStatus} + + targetSelectorMock.ExpectedCalls = nil + targetSelectorMock.EXPECT().TargetIsHealthy(mock.Anything, mock.Anything, mock.Anything).Return(&errorStatus) + + By("update the chaos pod object without the finalizer") + k8sClientMock.EXPECT().Update(mock.Anything, &chaosPod).Return(nil) + + chaosPodService, err := services.NewChaosPodService(chaosPodServiceConfig) + Expect(err).ShouldNot(HaveOccurred()) + + // Action + isRemoved, err := chaosPodService.HandleChaosPodTermination(context.Background(), disruption, &chaosPod) + + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("remove the finalizer") + Expect(chaosPod.GetFinalizers()).Should(Equal([]string{})) + Expect(isRemoved).To(BeTrue()) + }, + Entry("not found target", metav1.Status{ + Message: "Not found", + Reason: metav1.StatusReasonNotFound, + Code: http.StatusNotFound, + }), + Entry("pod is not running", metav1.Status{ + Message: "pod is not running", + }), + Entry("node is not ready", metav1.Status{ + Message: "node is not ready", + })) + }) + + Describe("error cases", func() { + When("the target is not healthy return an unexpected error", func() { + + BeforeEach(func() { + // Arrange + targetSelectorMock.ExpectedCalls = nil + targetSelectorMock.EXPECT().TargetIsHealthy(mock.Anything, mock.Anything, mock.Anything).Return(fmt.Errorf("an error happend")) + }) + + It("should not remove the finalizer", func() { + // Assert + By("return an error") + Expect(err).Should(HaveOccurred()) + + By("not update the chaos pod object") + k8sClientMock.AssertNotCalled(GinkgoT(), "Update") + + By("not remove the finalizer") + Expect(chaosPod.GetFinalizers()).Should(Equal([]string{chaostypes.ChaosPodFinalizer})) + Expect(isFinalizerRemoved).To(BeFalse()) + }) + + }) + + When("when the removeFinalizerForChaosPod return an error", func() { + + BeforeEach(func() { + // Arrange + errorStatus := errors.StatusError{ErrStatus: metav1.Status{ + Message: "node is not ready", + }} + targetSelectorMock.ExpectedCalls = nil + targetSelectorMock.EXPECT().TargetIsHealthy(mock.Anything, mock.Anything, mock.Anything).Return(&errorStatus) + + k8sClientMock.EXPECT().Update(mock.Anything, mock.Anything).Return(fmt.Errorf("an error happened")) + }) + + It("should not remove the finalizer", func() { + // Assert + By("return an error") + Expect(err).Should(HaveOccurred()) + + By("not remove the finalizer") + Expect(isFinalizerRemoved).To(BeFalse()) + }) + }) + + }) + + Context("with a succeeded pod", func() { + + BeforeEach(func() { + // Arrange + cpBuilder.WithStatusPhase(v1.PodSucceeded) + }) + + When("the k8s client return an error during the update", func() { + + BeforeEach(func() { + // Arrange + k8sClientMock.EXPECT().Update(mock.Anything, mock.Anything, mock.Anything).Return(fmt.Errorf("could not update")) + }) + + It("should propagate the error", func() { + // Assert + Expect(err).Should(HaveOccurred()) + Expect(isFinalizerRemoved).To(BeFalse()) + }) + }) + + }) + + Context("with a running node failure chaos pod ", func() { + BeforeEach(func() { + // Arrange + cpBuilder.WithStatusPhase(v1.PodRunning).WithChaosPodLabels(DefaultDisruptionName, DefaultNamespace, "", chaostypes.DisruptionKindNodeFailure) + }) + + When("the k8s client return an error during the update", func() { + + BeforeEach(func() { + // Arrange + k8sClientMock.EXPECT().Update(mock.Anything, mock.Anything, mock.Anything).Return(fmt.Errorf("could not update")) + }) + + It("should propagate the error", func() { + // Assert + Expect(err).Should(HaveOccurred()) + Expect(isFinalizerRemoved).To(BeFalse()) + }) + }) + }) + }) + + Context("with a chaos pod not being deleted", func() { + + BeforeEach(func() { + // Arrange + cpBuilder.WithChaosPodLabels(DefaultDisruptionName, DefaultNamespace, "", "").WithChaosFinalizer() + }) + + It("should not remove the finalizer", func() { + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("not remove the finalizer") + k8sClientMock.AssertNotCalled(GinkgoT(), "Update", mock.Anything, mock.Anything) + Expect(isFinalizerRemoved).To(BeFalse()) + }) + }) + + Context("with a chaos pod without finalizer", func() { + + BeforeEach(func() { + // Arrange + cpBuilder.WithChaosPodLabels(DefaultDisruptionName, DefaultNamespace, "", "").WithDeletion().Build() + }) + + It("should not remove the finalizer", func() { + // Assert + By("not returning an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("not try to remove finalizer because it is already deleted") + k8sClientMock.AssertNotCalled(GinkgoT(), "Update", mock.Anything, mock.Anything) + + By("return true because the finalizer is already removed") + Expect(isFinalizerRemoved).To(BeTrue()) + }) + }) + }) + + Describe("DeletePod", func() { + + var ( + pod v1.Pod + isDeleted bool + ) + + JustBeforeEach(func() { + // Action + isDeleted = chaosPodService.DeletePod(context.Background(), pod) + }) + + Context("with a pod not marked to be deleted", func() { + + BeforeEach(func() { + // Arrange + pod = builderstest.NewPodBuilder("test", DefaultNamespace).Build() + }) + + Describe("success cases", func() { + + Context("nominal case", func() { + BeforeEach(func() { + // Arrange + By("tell the k8s client to delete the pod") + k8sClientMock.EXPECT().Delete(mock.Anything, &pod).Return(nil) + }) + + It("should return true", func() { + Expect(isDeleted).To(BeTrue()) + }) + }) + + When("the k8s client return a not found error", func() { + + BeforeEach(func() { + // Arrange + errorNotFound := errors.StatusError{ + ErrStatus: metav1.Status{ + Message: "Not found", + Reason: metav1.StatusReasonNotFound, + Code: http.StatusNotFound, + }, + } + k8sClientMock.EXPECT().Delete(mock.Anything, &pod).Return(&errorNotFound) + }) + + It("should return true", func() { + // Assert + Expect(isDeleted).To(BeTrue()) + }) + }) + }) + + Describe("error cases", func() { + When("the k8s client return an error during the delete", func() { + + BeforeEach(func() { + // Arrange + k8sClientMock.EXPECT().Delete(mock.Anything, &pod).Return(fmt.Errorf("an error happened")) + }) + + It("should return false", func() { + // Assert + Expect(isDeleted).To(BeFalse()) + }) + }) + }) + }) + }) + + Describe("GenerateChaosPodsOfDisruption", func() { + + var ( + targetContainers map[string]string + DefaultInjectorNetworkDisruptionAllowedHosts []string + dBuilder *builderstest.DisruptionBuilder + args chaosapi.DisruptionArgs + expectedArgs []string + disruptionKindName chaostypes.DisruptionKindName + ) + + BeforeEach(func() { + // Arrange + dBuilder = builderstest.NewDisruptionBuilder() + targetContainers = map[string]string{"test": "test"} + DefaultInjectorNetworkDisruptionAllowedHosts = []string{"10.10.10.10", "11.11.11.11"} + chaosPodServiceConfig.Injector = services.ChaosPodServiceInjectorConfig{ + NetworkDisruptionAllowedHosts: DefaultInjectorNetworkDisruptionAllowedHosts, + DNSDisruptionDNSServer: DefaultInjectorDNSDisruptionDNSServer, + DNSDisruptionKubeDNS: DefaultInjectorDNSDisruptionKubeDNS, + } + pulseActiveDuration, pulseDormantDuration, pulseInitialDelay := time.Duration(0), time.Duration(0), time.Duration(0) + args = chaosapi.DisruptionArgs{ + Level: disruption.Spec.Level, + TargetContainers: targetContainers, + TargetName: DefaultTargetName, + TargetNodeName: DefaultTargetNodeName, + TargetPodIP: DefaultTargetPodIp, + DryRun: disruption.Spec.DryRun, + DisruptionName: disruption.Name, + DisruptionNamespace: DefaultNamespace, + OnInit: disruption.Spec.OnInit, + PulseInitialDelay: pulseInitialDelay, + PulseActiveDuration: pulseActiveDuration, + PulseDormantDuration: pulseDormantDuration, + MetricsSink: DefaultMetricsSinkName, + AllowedHosts: DefaultInjectorNetworkDisruptionAllowedHosts, + DNSServer: DefaultInjectorDNSDisruptionDNSServer, + KubeDNS: DefaultInjectorDNSDisruptionKubeDNS, + ChaosNamespace: DefaultChaosNamespace, + } + metricsSinkMock.EXPECT().GetSinkName().Return(DefaultMetricsSinkName).Maybe() + }) + + JustBeforeEach(func() { + // Arrange + if disruptionKindName == "" { + return + } + + disruption := dBuilder.WithDisruptionKind(disruptionKindName).WithNamespace(DefaultNamespace).Build() + + notInjectedBefore := disruption.TimeToInject() + + subSpec := disruption.Spec.DisruptionKindPicker(disruptionKindName) + + args.Kind = disruptionKindName + args.Level = disruption.Spec.Level + args.TargetContainers = targetContainers + args.DryRun = disruption.Spec.DryRun + args.DisruptionName = disruption.Name + args.OnInit = disruption.Spec.OnInit + args.NotInjectedBefore = notInjectedBefore + + expectedArgs = args.CreateCmdArgs(subSpec.GenerateArgs()) + expectedArgs = append(expectedArgs, "--deadline", time.Now().Add(disruption.RemainingDuration()).Format(time.RFC3339)) + + // Action + chaosPods, err = chaosPodService.GenerateChaosPodsOfDisruption(&disruption, DefaultTargetName, DefaultTargetNodeName, targetContainers, DefaultTargetPodIp) + }) + + Describe("success cases", func() { + + DescribeTable("success cases", func(disruption chaosv1beta1.Disruption, expectedNumberOfChaosPods int) { + // Arrange + metricsSinkMock.EXPECT().GetSinkName().Return(DefaultMetricsSinkName).Maybe() + + chaosPodService, err := services.NewChaosPodService(chaosPodServiceConfig) + Expect(err).ShouldNot(HaveOccurred()) + + // Action + chaosPods, err := chaosPodService.GenerateChaosPodsOfDisruption(&disruption, DefaultTargetName, DefaultTargetNodeName, targetContainers, DefaultTargetPodIp) + + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("return two pods") + Expect(chaosPods).To(HaveLen(expectedNumberOfChaosPods)) + }, + Entry("disruption with two kinds", + builderstest.NewDisruptionBuilder().WithDisruptionKind( + chaostypes.DisruptionKindNodeFailure, + ).WithDisruptionKind( + chaostypes.DisruptionKindDiskFailure, + ).Build(), + 2, + ), Entry("disruption with one kind", + builderstest.NewDisruptionBuilder().WithDisruptionKind( + chaostypes.DisruptionKindNodeFailure, + ).Build(), + 1, + ), + Entry("without disruption", nil, 0), + ) + + Context("with a disk failure disruption", func() { + + BeforeEach(func() { + // Arrange + disruptionKindName = chaostypes.DisruptionKindDiskFailure + }) + + It("should succeed", func() { + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("return only one pod") + Expect(chaosPods).To(HaveLen(1)) + + By("having the correct container arguments") + Expect(chaosPods[0].Spec.Containers[0].Args).Should(Equal(expectedArgs)) + }) + }) + + Context("with a network disruption with a DisableDefaultAllowedHosts", func() { + BeforeEach(func() { + // Arrange + disruptionKindName = chaostypes.DisruptionKindNetworkDisruption + + dBuilder.WithNetworkDisableDefaultAllowedHosts(true) + + args.AllowedHosts = make([]string, 0) + }) + + It("should succeed", func() { + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("return only one pod") + Expect(chaosPods).To(HaveLen(1)) + + By("having the correct container arguments") + Expect(chaosPods[0].Spec.Containers[0].Args).Should(Equal(expectedArgs)) + }) + + Context("with a network cloud spec", func() { + + var serviceName string + + BeforeEach(func() { + // Arrange + serviceName = "GCP" + + cloudSpec := &chaosv1beta1.NetworkDisruptionCloudSpec{ + GCPServiceList: &[]chaosv1beta1.NetworkDisruptionCloudServiceSpec{ + { + ServiceName: serviceName, + Protocol: "TCP", + Flow: "ingress", + ConnState: "open", + }, + }, + } + + dBuilder.WithNetworkDisruptionCloudSpec(cloudSpec) + }) + + Context("nominal cases", func() { + + BeforeEach(func() { + // Arrange + cloudServicesProvidersManagerMock.EXPECT().GetServicesIPRanges( + cloudtypes.CloudProviderName(serviceName), + []string{serviceName}, + ).Return(map[string][]string{ + serviceName: { + "10.0.0.0-10.10.10.10", + }, + }, nil).Once() + }) + + It("should succeed", func() { + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("return only one pod") + Expect(chaosPods).To(HaveLen(1)) + + By("having the correct service cloud args") + Expect(chaosPods[0].Spec.Containers[0].Args).Should(ContainElements("--hosts", "10.0.0.0-10.10.10.10;0;TCP;ingress;open")) + }) + }) + + When("the cloud manager return an error during the fetching of services ip ranges", func() { + BeforeEach(func() { + // Arrange + cloudServicesProvidersManagerMock.EXPECT().GetServicesIPRanges( + mock.Anything, + mock.Anything, + ).Return(nil, fmt.Errorf("an error happened")) + }) + + It("should propagate the error", func() { + Expect(err).Should(HaveOccurred()) + }) + }) + }) + + Context("with a Pulse Spec", func() { + + BeforeEach(func() { + // Arrange + pulseActiveDuration, pulseDormantDuration, pulseInitialDelay := time.Duration(10), time.Duration(11), time.Duration(12) + + dBuilder.WithSpecPulse(&chaosv1beta1.DisruptionPulse{ + ActiveDuration: chaosv1beta1.DisruptionDuration(pulseActiveDuration.String()), + DormantDuration: chaosv1beta1.DisruptionDuration(pulseDormantDuration.String()), + InitialDelay: chaosv1beta1.DisruptionDuration(pulseInitialDelay.String()), + }) + + args.PulseActiveDuration = pulseActiveDuration + args.PulseDormantDuration = pulseDormantDuration + args.PulseInitialDelay = pulseInitialDelay + }) + + It("should succeed", func() { + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("return only one pod") + Expect(chaosPods).To(HaveLen(1)) + + By("having the correct container arguments") + Expect(chaosPods[0].Spec.Containers[0].Args).Should(Equal(expectedArgs)) + }) + }) + }) + }) + }) + + Describe("GenerateChaosPodOfDisruption", func() { + var ( + DefaultTerminationGracePeriod int64 + DefaultActiveDeadlineSeconds int64 + DefaultExpectedArgs []string + DefaultInjectorAnnotation map[string]string + DefaultInjectorLabels map[string]string + EmptyInjectorLabels map[string]string + ) + + BeforeEach(func() { + // Arrange + DefaultTerminationGracePeriod = int64(60) + DefaultActiveDeadlineSeconds = int64(disruption.RemainingDuration().Seconds()) + 10 + DefaultExpectedArgs = []string{ + "toto", + "--deadline", time.Now().Add(disruption.RemainingDuration()).Format(time.RFC3339), + } + DefaultInjectorAnnotation = map[string]string{ + "lorem": "ipsum", + } + DefaultInjectorLabels = map[string]string{ + "ipsum": "dolores", + } + EmptyInjectorLabels = map[string]string{} + }) + + DescribeTable("success cases", func(expectedPodBuilder *builderstest.ChaosPodBuilder, expectedLabels map[string]string) { + // Arrange + expectedChaosPod := expectedPodBuilder.WithChaosSpec( + DefaultTargetNodeName, + DefaultTerminationGracePeriod, + DefaultActiveDeadlineSeconds, + DefaultExpectedArgs, + DefaultHostPathDirectory, + DefaultPathFile, + DefaultInjectorServiceAccount, + DefaultInjectorImage, + ).Build() + + imagePullSecrets := "" + if expectedChaosPod.Spec.ImagePullSecrets != nil { + imagePullSecrets = DefaultImagePullSecrets + } + + chaosPodServiceConfig.Injector = services.ChaosPodServiceInjectorConfig{ + ServiceAccount: DefaultInjectorServiceAccount, + Image: DefaultInjectorImage, + Annotations: DefaultInjectorAnnotation, + Labels: DefaultInjectorLabels, + ImagePullSecrets: imagePullSecrets, + } + + chaosPodService, err := services.NewChaosPodService(chaosPodServiceConfig) + Expect(err).ShouldNot(HaveOccurred()) + + args := []string{"toto"} + kind := chaostypes.DisruptionKindNames[0] + + // Action + chaosPod := chaosPodService.GenerateChaosPodOfDisruption(disruption, DefaultTargetName, DefaultTargetNodeName, args, kind) + + // Arrange + // Remove containers args to avoid error due to a time.Now() which can diverge and create false negative results. + for key := range chaosPod.Spec.Containers { + chaosPod.Spec.Containers[key].Args = nil + expectedChaosPod.Spec.Containers[key].Args = nil + } + + // Assert + By("return the expected spec") + Expect(chaosPod.Spec).Should(Equal(expectedChaosPod.Spec)) + + By("return the correct object meta") + Expect(chaosPod.ObjectMeta.GenerateName).Should(Equal(fmt.Sprintf("chaos-%s-", DefaultDisruptionName))) + Expect(chaosPod.ObjectMeta.Namespace).Should(Equal(DefaultChaosNamespace)) + Expect(chaosPod.ObjectMeta.Annotations).Should(Equal(DefaultInjectorAnnotation)) + Expect(chaosPod.ObjectMeta.Labels[chaostypes.TargetLabel]).Should(Equal(DefaultTargetName)) + Expect(chaosPod.ObjectMeta.Labels[chaostypes.DisruptionKindLabel]).Should(Equal(string(kind))) + Expect(chaosPod.ObjectMeta.Labels[chaostypes.DisruptionNameLabel]).Should(Equal(DefaultDisruptionName)) + Expect(chaosPod.ObjectMeta.Labels[chaostypes.DisruptionNamespaceLabel]).Should(Equal(DefaultNamespace)) + for name, value := range expectedLabels { + Expect(chaosPod.ObjectMeta.Labels[name]).Should(Equal(value)) + } + + By("add the finalizer") + Expect(controllerutil.ContainsFinalizer(&chaosPod, chaostypes.ChaosPodFinalizer)).To(BeTrue()) + }, + Entry("chaos pod without image pull secrets", + builderstest.NewPodBuilder( + "pod-1", + DefaultChaosNamespace, + ), + EmptyInjectorLabels), + Entry("chaos pod with image pull secrets", + builderstest.NewPodBuilder( + "pod-1", + DefaultChaosNamespace, + ).WithPullSecrets([]v1.LocalObjectReference{ + { + Name: DefaultImagePullSecrets, + }, + }), + EmptyInjectorLabels), + Entry("chaos pod with injector labels", + builderstest.NewPodBuilder( + "pod-1", + DefaultChaosNamespace, + ).WithLabels(DefaultInjectorLabels), + DefaultInjectorLabels), + ) + }) + + Describe("GetPodInjectorArgs", func() { + + var chaosPodArgs []string + + JustBeforeEach(func() { + // Action + chaosPodArgs = chaosPodService.GetPodInjectorArgs(chaosPod) + }) + + Describe("success cases", func() { + Context("with a single chaos pod", func() { + + Context("with a single container with args", func() { + BeforeEach(func() { + // Arrange + chaosPod = builderstest.NewPodBuilder("test-1", DefaultNamespace).WithChaosSpec( + DefaultTargetNodeName, + int64(60), + int64(60), + []string{ + "1", + "2", + }, + DefaultHostPathDirectory, + DefaultPathFile, + DefaultInjectorServiceAccount, + DefaultInjectorImage, + ).Build() + }) + + It("should return the chaos pod args", func() { + // Assert + Expect(chaosPodArgs).Should(Equal([]string{ + "1", + "2", + })) + }) + }) + + Context("without container", func() { + BeforeEach(func() { + // Arrange + chaosPod = builderstest.NewPodBuilder("test-1", DefaultNamespace).Build() + }) + + It("should return an empty args", func() { + // Assert + Expect(chaosPodArgs).Should(Equal([]string{})) + }) + }) + + }) + }) + }) + + Describe("CreatePod", func() { + BeforeEach(func() { + // Arrange + chaosPod = builderstest.NewPodBuilder("test-1", DefaultNamespace).Build() + }) + + JustBeforeEach(func() { + // Action + err = chaosPodService.CreatePod(context.Background(), &chaosPod) + }) + + Describe("success case", func() { + BeforeEach(func() { + // Arrange + By("create the chaos pod with the ks8 client") + k8sClientMock.EXPECT().Create(mock.Anything, &chaosPod).Return(nil) + }) + + It("should not return an error", func() { + // Assert + Expect(err).ShouldNot(HaveOccurred()) + }) + }) + + Describe("error cases", func() { + When("the k8s client return an error during the create", func() { + + BeforeEach(func() { + // Arrange + By("create the chaos pod with the ks8 client") + k8sClientMock.EXPECT().Create(mock.Anything, &chaosPod).Return(fmt.Errorf("an error happened")) + }) + + It("should propagate the error", func() { + // Assert + Expect(err).Should(HaveOccurred()) + }) + }) + }) + }) + + Describe("WaitForPodCreation", func() { + JustBeforeEach(func() { + // Action + err = chaosPodService.WaitForPodCreation(context.Background(), chaosPod) + }) + + Context("with a single pod", func() { + + BeforeEach(func() { + // Arrange + chaosPod = builderstest.NewPodBuilder("test-1", DefaultNamespace).Build() + }) + + Describe("success cases", func() { + + BeforeEach(func() { + // Arrange + By("call the Get method of the k8s client") + errorStatus := errors.StatusError{ + ErrStatus: metav1.Status{ + Message: "Not found", + Reason: metav1.StatusReasonNotFound, + Code: http.StatusNotFound, + }, + } + k8sClientMock.EXPECT().Get(mock.Anything, types.NamespacedName{Namespace: chaosPod.Namespace, Name: chaosPod.Name}, &chaosPod).Return(&errorStatus).Once() + k8sClientMock.EXPECT().Get(mock.Anything, types.NamespacedName{Namespace: chaosPod.Namespace, Name: chaosPod.Name}, &chaosPod).Return(nil).Once() + }) + + It("should not return an error", func() { + // Assert + Expect(err).ShouldNot(HaveOccurred()) + }) + }) + + Describe("error cases", func() { + + When("the Get method of the k8s client an error", func() { + + BeforeEach(func() { + // Arrange + By("call the Get method of the k8s client") + k8sClientMock.EXPECT().Get(mock.Anything, types.NamespacedName{Namespace: chaosPod.Namespace, Name: chaosPod.Name}, &chaosPod).Return(fmt.Errorf("")).Once() + }) + + It("should return an error", func() { + // Assert + Expect(err).Should(HaveOccurred()) + }) + }) + + }) + }) + }) + + Describe("HandleOrphanedChaosPods", func() { + var ( + DefaultLs map[string]string + DefaultReq ctrl.Request + ) + + BeforeEach(func() { + // Arrange + metricsSinkMock.EXPECT().MetricOrphanFound(mock.Anything).Return(nil).Maybe() + DefaultLs = map[string]string{ + chaostypes.DisruptionNameLabel: DefaultDisruptionName, + chaostypes.DisruptionNamespaceLabel: DefaultNamespace, + } + DefaultReq = ctrl.Request{ + NamespacedName: types.NamespacedName{ + Namespace: DefaultNamespace, + Name: DefaultDisruptionName, + }, + } + }) + + JustBeforeEach(func() { + // Action + err = chaosPodService.HandleOrphanedChaosPods(context.Background(), DefaultReq) + }) + + Describe("success cases", func() { + + Context("with three chaos pods", func() { + BeforeEach(func() { + // Arrange + chaosPods = []v1.Pod{ + builderstest.NewPodBuilder("test-1", DefaultNamespace).WithChaosFinalizer().WithChaosPodLabels(DefaultDisruptionName, DefaultDisruptionName, DefaultTargetName, chaostypes.DisruptionKindDiskFailure).Build(), + builderstest.NewPodBuilder("test-2", DefaultNamespace).WithChaosFinalizer().WithChaosPodLabels(DefaultDisruptionName, DefaultDisruptionName, DefaultTargetName, chaostypes.DisruptionKindDiskFailure).Build(), + builderstest.NewPodBuilder("test-3", DefaultNamespace).WithChaosFinalizer().WithChaosPodLabels(DefaultDisruptionName, DefaultDisruptionName, DefaultTargetName, chaostypes.DisruptionKindDiskFailure).Build(), + } + }) + + Context("nominal cases", func() { + + BeforeEach(func() { + // Arrange + By("list the existing chaos pods matching criteria") + k8sClientMock.EXPECT().List(mock.Anything, mock.Anything, &client.ListOptions{ + Namespace: DefaultChaosNamespace, + LabelSelector: labels.SelectorFromValidatedSet(DefaultLs), + }).Return(nil).Run(func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) { + list.(*v1.PodList).Items = chaosPods + }).Once() + + By("check if the target exist for each chaos pods") + k8sClientMock.EXPECT().Get(mock.Anything, types.NamespacedName{ + Namespace: DefaultNamespace, + Name: DefaultTargetName, + }, mock.Anything).Return(&errors.StatusError{ErrStatus: metav1.Status{ + Message: "Not found", + Reason: metav1.StatusReasonNotFound, + Code: http.StatusNotFound, + }}).Times(3) + + for _, pod := range chaosPods { + By("remove the finalizer of all chaos pods") + podWithoutFinalizer := pod.DeepCopy() + controllerutil.RemoveFinalizer(podWithoutFinalizer, chaostypes.ChaosPodFinalizer) + k8sClientMock.EXPECT().Update(mock.Anything, podWithoutFinalizer).Return(nil).Once() + + By("remove all chaos pods") + k8sClientMock.EXPECT().Delete(mock.Anything, podWithoutFinalizer).Return(nil).Once() + } + }) + + It("should remove orphan chaos pods", func() { + // Assert + Expect(err).ShouldNot(HaveOccurred()) + }) + }) + + When("an error occur during the removing of the finalizer for all chaos pods", func() { + + BeforeEach(func() { + // Arrange + By("list the existing chaos pods matching criteria") + k8sClientMock.EXPECT().List(mock.Anything, mock.Anything, &client.ListOptions{ + Namespace: DefaultChaosNamespace, + LabelSelector: labels.SelectorFromValidatedSet(DefaultLs), + }).Return(nil).Run(func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) { + list.(*v1.PodList).Items = chaosPods + }).Once() + + By("check if the target exist for each chaos pods") + k8sClientMock.EXPECT().Get(mock.Anything, types.NamespacedName{ + Namespace: DefaultNamespace, + Name: DefaultTargetName, + }, mock.Anything).Return(&errors.StatusError{ErrStatus: metav1.Status{ + Message: "Not found", + Reason: metav1.StatusReasonNotFound, + Code: http.StatusNotFound, + }}).Times(3) + + k8sClientMock.EXPECT().Update(mock.Anything, mock.Anything).Return(fmt.Errorf("an error happened")).Times(3) + }) + + It("should not remove the chaos pod", func() { + // Assert + By("not return an error") + Expect(err).ShouldNot(HaveOccurred()) + + By("not delete chaos pods") + k8sClientMock.AssertNotCalled(GinkgoT(), "Delete") + }) + }) + + When("an error occur during the removing of the finalizer for a single chaos pod", func() { + + BeforeEach(func() { + // Arrange + By("list the existing chaos pods matching criteria") + k8sClientMock.EXPECT().List(mock.Anything, mock.Anything, &client.ListOptions{ + Namespace: DefaultChaosNamespace, + LabelSelector: labels.SelectorFromValidatedSet(DefaultLs), + }).Return(nil).Run(func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) { + list.(*v1.PodList).Items = chaosPods + }).Once() + + By("check if the target exist for each chaos pods") + k8sClientMock.EXPECT().Get(mock.Anything, types.NamespacedName{ + Namespace: DefaultNamespace, + Name: DefaultTargetName, + }, mock.Anything).Return(&errors.StatusError{ErrStatus: metav1.Status{ + Message: "Not found", + Reason: metav1.StatusReasonNotFound, + Code: http.StatusNotFound, + }}).Times(3) + + for i, pod := range chaosPods { + podWithoutFinalizer := pod.DeepCopy() + controllerutil.RemoveFinalizer(podWithoutFinalizer, chaostypes.ChaosPodFinalizer) + + if i == 1 { + By("return an error for the second chaos pod") + k8sClientMock.EXPECT().Update(mock.Anything, podWithoutFinalizer).Return(fmt.Errorf("an error occured")).Once() + + continue + } + + By("remove the finalizer of all chaos pods") + k8sClientMock.EXPECT().Update(mock.Anything, podWithoutFinalizer).Return(nil).Once() + + By("remove all chaos pods") + k8sClientMock.EXPECT().Delete(mock.Anything, podWithoutFinalizer).Return(nil).Once() + } + }) + + It("should not remove the chaos pod", func() { + // Assert + Expect(err).ShouldNot(HaveOccurred()) + + By("remove only two chaos pods") + k8sClientMock.AssertNumberOfCalls(GinkgoT(), "Delete", 2) + }) + }) + + When("an error occur during the removing of the chaos pod for all chaos pods", func() { + + BeforeEach(func() { + // Arrange + By("list the existing chaos pods matching criteria") + k8sClientMock.EXPECT().List(mock.Anything, mock.Anything, &client.ListOptions{ + Namespace: DefaultChaosNamespace, + LabelSelector: labels.SelectorFromValidatedSet(DefaultLs), + }).Return(nil).Run(func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) { + list.(*v1.PodList).Items = chaosPods + }).Once() + + By("check if the target exist for each chaos pods") + k8sClientMock.EXPECT().Get(mock.Anything, types.NamespacedName{ + Namespace: DefaultNamespace, + Name: DefaultTargetName, + }, mock.Anything).Return(&errors.StatusError{ErrStatus: metav1.Status{ + Message: "Not found", + Reason: metav1.StatusReasonNotFound, + Code: http.StatusNotFound, + }}).Times(3) + + k8sClientMock.EXPECT().Update(mock.Anything, mock.Anything).Return(nil).Times(3) + + k8sClientMock.EXPECT().Delete(mock.Anything, mock.Anything).Return(fmt.Errorf("an error occured")).Times(3) + }) + + It("should not remove the chaos pod", func() { + // Assert + Expect(err).ShouldNot(HaveOccurred()) + }) + }) + }) + + Context("with a single chaos pod", func() { + + BeforeEach(func() { + // Arrange + chaosPods = []v1.Pod{ + builderstest.NewPodBuilder("test-1", DefaultNamespace).WithChaosFinalizer().WithChaosPodLabels(DefaultDisruptionName, DefaultDisruptionName, DefaultTargetName, chaostypes.DisruptionKindDiskFailure).Build(), + } + }) + + Context("the target still exist", func() { + + BeforeEach(func() { + // Arrange + By("list the existing chaos pods matching criteria") + k8sClientMock.EXPECT().List(mock.Anything, mock.Anything, &client.ListOptions{ + Namespace: DefaultChaosNamespace, + LabelSelector: labels.SelectorFromValidatedSet(DefaultLs), + }).Return(nil).Run(func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) { + list.(*v1.PodList).Items = chaosPods + }).Once() + + By("check if the target exist") + k8sClientMock.EXPECT().Get(mock.Anything, types.NamespacedName{ + Namespace: DefaultNamespace, + Name: DefaultTargetName, + }, mock.Anything).Return(nil).Once() + }) + + It("should not remove the non orphan chaos pod", func() { + // Assert + Expect(err).ShouldNot(HaveOccurred()) + + By("not remove the finalizer") + k8sClientMock.AssertNotCalled(GinkgoT(), "Update") + + By("not delete the chaos pod") + k8sClientMock.AssertNotCalled(GinkgoT(), "Delete") + }) + }) + + When("the k8s client return an unexpected error during the verification of the target", func() { + + BeforeEach(func() { + // Arrange + chaosPods := []v1.Pod{ + builderstest.NewPodBuilder("test-1", DefaultNamespace).WithChaosFinalizer().WithChaosPodLabels(DefaultDisruptionName, DefaultDisruptionName, DefaultTargetName, chaostypes.DisruptionKindDiskFailure).Build(), + } + + By("list the existing chaos pods matching criteria") + k8sClientMock.EXPECT().List(mock.Anything, mock.Anything, &client.ListOptions{ + Namespace: DefaultChaosNamespace, + LabelSelector: labels.SelectorFromValidatedSet(DefaultLs), + }).Return(nil).Run(func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) { + list.(*v1.PodList).Items = chaosPods + }).Once() + + By("check if the target exist") + k8sClientMock.EXPECT().Get(mock.Anything, types.NamespacedName{ + Namespace: DefaultNamespace, + Name: DefaultTargetName, + }, mock.Anything).Return(fmt.Errorf("an error happened")).Once() + }) + + It("should not remove the non orphan chaos pods", func() { + // Assert + Expect(err).ShouldNot(HaveOccurred()) + + By("not remove the finalizer") + k8sClientMock.AssertNotCalled(GinkgoT(), "Update") + + By("not delete the chaos pod") + k8sClientMock.AssertNotCalled(GinkgoT(), "Delete") + }) + }) + }) + }) + + Describe("error cases", func() { + + When("GetChaosPodsOfDisruption return an error", func() { + + BeforeEach(func() { + // Arrange + By("list the existing chaos pods matching criteria") + k8sClientMock.EXPECT().List(mock.Anything, mock.Anything, &client.ListOptions{ + Namespace: DefaultChaosNamespace, + LabelSelector: labels.SelectorFromValidatedSet(DefaultLs), + }).Return(fmt.Errorf("an error happened")).Once() + }) + + It("should propagate the error", func() { + // Assert + Expect(err).Should(HaveOccurred()) + + By("not verify the presence of the target") + k8sClientMock.AssertNotCalled(GinkgoT(), "Get") + + By("not remove the finalizer") + k8sClientMock.AssertNotCalled(GinkgoT(), "Update") + + By("not delete the chaos pod") + k8sClientMock.AssertNotCalled(GinkgoT(), "Delete") + }) + }) + }) + }) +}) diff --git a/services/suite_test.go b/services/suite_test.go new file mode 100644 index 000000000..0f77944eb --- /dev/null +++ b/services/suite_test.go @@ -0,0 +1,29 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2023 Datadog, Inc. + +package services_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.uber.org/zap" + "go.uber.org/zap/zaptest" + // +kubebuilder:scaffold:imports +) + +var logger *zap.SugaredLogger + +var _ = BeforeSuite(func() { + // Arrange + logger = zaptest.NewLogger(GinkgoT()).Sugar() +}) + +func TestAPIs(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Services Suite") +} diff --git a/targetselector/target_selector.go b/targetselector/target_selector.go index 6e3f539bd..8a0ace8ad 100644 --- a/targetselector/target_selector.go +++ b/targetselector/target_selector.go @@ -6,8 +6,12 @@ package targetselector import ( + "fmt" + "regexp" + chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -15,7 +19,22 @@ import ( type TargetSelector interface { // GetMatchingPodsOverTotalPods Returns list of matching ready and untargeted pods and number of total pods GetMatchingPodsOverTotalPods(c client.Client, instance *chaosv1beta1.Disruption) (*corev1.PodList, int, error) - // GetMatchingPodsOverTotalPods Returns list of matching ready and untargeted nodes and number of total nodes + + // GetMatchingNodesOverTotalNodes Returns list of matching ready and untargeted nodes and number of total nodes GetMatchingNodesOverTotalNodes(c client.Client, instance *chaosv1beta1.Disruption) (*corev1.NodeList, int, error) + + // TargetIsHealthy Returns an error if the given target is unhealthy or does not exist TargetIsHealthy(target string, c client.Client, instance *chaosv1beta1.Disruption) error } + +// ValidateLabelSelector assert label selector matches valid grammar, avoids CORE-414 +func ValidateLabelSelector(selector labels.Selector) error { + labelGrammar := "([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]" + rgx := regexp.MustCompile(labelGrammar) + + if !rgx.MatchString(selector.String()) { + return fmt.Errorf("given label selector is invalid, it does not match valid selector grammar: %s %s", selector.String(), labelGrammar) + } + + return nil +} diff --git a/targetselector/target_selector_test.go b/targetselector/target_selector_test.go new file mode 100644 index 000000000..fc6904e62 --- /dev/null +++ b/targetselector/target_selector_test.go @@ -0,0 +1,42 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2023 Datadog, Inc. + +package targetselector_test + +import ( + "github.com/DataDog/chaos-controller/targetselector" + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/labels" +) + +var _ = ginkgo.Describe("Label Selector Validation", func() { + ginkgo.Context("validating an empty label selector", func() { + ginkgo.It("should succeed", func() { + selector := labels.Set{} + gomega.Expect(targetselector.ValidateLabelSelector(selector.AsSelector())).ToNot(gomega.Succeed()) + }) + }) + ginkgo.Context("validating a good label selector", func() { + ginkgo.It("should succeed", func() { + selector := labels.Set{"foo": "bar"} + gomega.Expect(targetselector.ValidateLabelSelector(selector.AsSelector())).To(gomega.Succeed()) + }) + }) + ginkgo.Context("validating special characters in label selector", func() { + ginkgo.It("should succeed", func() { + selector := labels.Set{"foo": "”bar”"} + //.AsSelector() should strip invalid characters + gomega.Expect(targetselector.ValidateLabelSelector(selector.AsSelector())).To(gomega.Succeed()) + }) + }) + ginkgo.Context("validating too many quotes in label selector", func() { + ginkgo.It("should succeed", func() { + selector := labels.Set{"foo": "\"bar\""} + //.AsSelector() should strip invalid characters + gomega.Expect(targetselector.ValidateLabelSelector(selector.AsSelector())).To(gomega.Succeed()) + }) + }) +}) diff --git a/utils/utils.go b/utils/utils.go index 96db76447..0459e2469 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -52,6 +52,6 @@ type SetupWebhookWithManagerConfig struct { HandlerEnabledFlag bool DefaultDurationFlag time.Duration ChaosNamespace string - CloudServicesProvidersManager *cloudservice.CloudServicesProvidersManager + CloudServicesProvidersManager cloudservice.CloudServicesProvidersManager Environment string } diff --git a/watchers/disruptions_watchers_manager_test.go b/watchers/disruptions_watchers_manager_test.go index 70601e0bc..3b339edb9 100644 --- a/watchers/disruptions_watchers_manager_test.go +++ b/watchers/disruptions_watchers_manager_test.go @@ -156,7 +156,7 @@ var _ = Describe("Disruptions watchers manager", func() { It("should return an error", func() { // Act Expect(err).To(HaveOccurred()) - Expect(err.Error()).Should(Equal("NewDisruptionTargetWatcher error")) + Expect(err).To(MatchError("NewDisruptionTargetWatcher error")) }) }) @@ -173,7 +173,7 @@ var _ = Describe("Disruptions watchers manager", func() { It("should return an error", func() { Expect(err).To(HaveOccurred()) - Expect(err.Error()).Should(Equal("NewChaosPodWatcher error message")) + Expect(err).To(MatchError("NewChaosPodWatcher error message")) }) }) @@ -189,7 +189,7 @@ var _ = Describe("Disruptions watchers manager", func() { It("should return an error", func() { Expect(err).To(HaveOccurred()) - Expect(err.Error()).Should(Equal("failed to create watcher: disruptionTargetWatcher message")) + Expect(err).To(MatchError("failed to create watcher: disruptionTargetWatcher message")) }) }) @@ -205,7 +205,7 @@ var _ = Describe("Disruptions watchers manager", func() { It("should return an error", func() { Expect(err).To(HaveOccurred()) - Expect(err.Error()).Should(Equal("failed to create watcher: chaosPodWatcher error")) + Expect(err).To(MatchError("failed to create watcher: chaosPodWatcher error")) }) }) }) @@ -223,7 +223,7 @@ var _ = Describe("Disruptions watchers manager", func() { // Assert Expect(err).Should(HaveOccurred()) - Expect(err.Error()).Should(Equal("the disruption is not valid. It should contain a name and a namespace")) + Expect(err).To(MatchError("the disruption is not valid. It should contain a name and a namespace")) }) }) }) diff --git a/watchers/factory_test.go b/watchers/factory_test.go index b54afe638..4ea887e65 100644 --- a/watchers/factory_test.go +++ b/watchers/factory_test.go @@ -110,7 +110,7 @@ var _ = Describe("Watcher factory", func() { It("should return an error", func() { Expect(err).Should(HaveOccurred()) - Expect(err.Error()).Should(Equal("the disruption fields name and namespace of the ObjectMeta field are required")) + Expect(err).To(MatchError("the disruption fields name and namespace of the ObjectMeta field are required")) }) }) }) diff --git a/watchers/manager_test.go b/watchers/manager_test.go index d1e856eb0..3158a6a59 100644 --- a/watchers/manager_test.go +++ b/watchers/manager_test.go @@ -229,7 +229,7 @@ var _ = Describe("Manager of watchers", func() { It("should return an error", func() { Expect(err).Should(HaveOccurred()) - Expect(err.Error()).Should(Equal("the watcher 1 does not exist")) + Expect(err).To(MatchError("the watcher 1 does not exist")) }) }) }) diff --git a/watchers/watcher_test.go b/watchers/watcher_test.go index fc93d79a0..64d7269e7 100644 --- a/watchers/watcher_test.go +++ b/watchers/watcher_test.go @@ -151,7 +151,7 @@ var _ = Describe("watcher", func() { // Assert Expect(err).Should(HaveOccurred()) - Expect(err.Error()).Should(Equal("error getting informer from cache. Error: get informer error")) + Expect(err).To(MatchError("error getting informer from cache. Error: get informer error")) }) }) @@ -167,7 +167,7 @@ var _ = Describe("watcher", func() { err = watcher.Start() Expect(err).Should(HaveOccurred()) - Expect(err.Error()).Should(Equal("error adding event handler to the informer. Error: informer error")) + Expect(err).To(MatchError("error adding event handler to the informer. Error: informer error")) }) }) @@ -285,7 +285,7 @@ var _ = Describe("watcher", func() { It("should return an error", func() { Expect(err).Should(HaveOccurred()) - Expect(err.Error()).Should(Equal("the watcher should be started with its Start method in order to initialise the cache source")) + Expect(err).To(MatchError("the watcher should be started with its Start method in order to initialise the cache source")) }) It("should return nil", func() { @@ -309,7 +309,7 @@ var _ = Describe("watcher", func() { // Assert Expect(err).Should(HaveOccurred()) - Expect(err.Error()).Should(Equal("the watcher should be started with its Start method in order to initialize the context tuple")) + Expect(err).To(MatchError("the watcher should be started with its Start method in order to initialize the context tuple")) }) })