Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #752: InjectionStatus should default to NotInjected. Notify users when we can't find a target container. #758

Merged
merged 7 commits into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion api/v1beta1/disruption_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ type DisruptionStatus struct {
IsInjected bool `json:"isInjected,omitempty"`
// +kubebuilder:validation:Enum=NotInjected;PartiallyInjected;PausedPartiallyInjected;Injected;PausedInjected;PreviouslyNotInjected;PreviouslyPartiallyInjected;PreviouslyInjected
// +ddmark:validation:Enum=NotInjected;PartiallyInjected;PausedPartiallyInjected;Injected;PausedInjected;PreviouslyNotInjected;PreviouslyPartiallyInjected;PreviouslyInjected
// +kubebuilder:default=NotInjected
InjectionStatus chaostypes.DisruptionInjectionStatus `json:"injectionStatus,omitempty"`
// +nullable
TargetInjections TargetInjections `json:"targetInjections,omitempty"`
Expand Down Expand Up @@ -630,7 +631,7 @@ func TargetedContainers(pod corev1.Pod, containerNames []string) (map[string]str
if containerID, existsInPod := allContainers[containerName]; existsInPod {
targetedContainers[containerName] = containerID
} else {
return nil, fmt.Errorf("could not find specified container in pod (pod: %s, target: %s)", pod.ObjectMeta.Name, containerName)
return nil, fmt.Errorf("could not find specified container in pod (pod: %s, targetContainer: %s)", pod.ObjectMeta.Name, containerName)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,7 @@ spec:
description: Number of targets with a chaos pod ready
type: integer
injectionStatus:
default: NotInjected
description: DisruptionInjectionStatus represents the injection status of a disruption
enum:
- NotInjected
Expand Down
32 changes: 7 additions & 25 deletions controllers/disruption_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import (
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change came about while testing, happy to undo it, but I think it helps make it clear in the code that we aren't importing the std lib's errors package

"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
Expand Down Expand Up @@ -128,23 +128,6 @@ func (r *DisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return
}

unwrappedError, ok := err.(chaostypes.DisruptionError)
if ok {
errorContext := unwrappedError.Context()
wraps := make([]interface{}, 0, len(errorContext)+2)
wraps = append(wraps, "err", unwrappedError)

for key, value := range errorContext {
wraps = append(wraps, key, value)
}

if isModifiedError(unwrappedError) {
r.log.Infow("a retryable error occurred in reconcile loop", wraps...)
} else {
r.log.Errorw("an error occurred in reconcile loop", wraps...)
}
}

if isModifiedError(err) {
r.log.Infow("a retryable error occurred in reconcile loop", "error", err)
} else {
Expand Down Expand Up @@ -519,7 +502,7 @@ func (r *DisruptionReconciler) startInjection(instance *chaosv1beta1.Disruption)
}

if err = r.createChaosPods(instance, targetName); err != nil {
if !errors.IsNotFound(err) {
if !apierrors.IsNotFound(err) {
return fmt.Errorf("error creating chaos pods: %w", err)
}

Expand Down Expand Up @@ -555,10 +538,9 @@ func (r *DisruptionReconciler) createChaosPods(instance *chaosv1beta1.Disruption
// get IDs of targeted containers or all containers
targetContainers, err = chaosv1beta1.TargetedContainers(pod, instance.Spec.Containers)
if err != nil {
dErr := chaostypes.DisruptionError{Err: fmt.Errorf("error getting target pod container ID: %w", err)}
dErr.AddContext("targetPodStatus", pod.Status.String())
dErr.AddContext("targetPodName", target)
dErr.AddContext("targetPodNamespace", instance.Namespace)
dErr := fmt.Errorf("error getting target pod's container ID: %w", err)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This err already contains the target pod's name and container name. The namespace is already contained in the disruption logs. Additionally, the Status.String() was not printing something as simple as "Running" or "Completed", but an enormous struct, and thus not useful


r.recordEventOnDisruption(instance, chaosv1beta1.EventInvalidSpecDisruption, dErr.Error(), pod.Name)

return dErr
}
Expand Down Expand Up @@ -714,7 +696,7 @@ func (r *DisruptionReconciler) handleOrphanedChaosPods(req ctrl.Request) error {
r.log.Infow("checking if we can clean up orphaned chaos pod", "chaosPod", chaosPod.Name, "target", target)

// if target doesn't exist, we can try to clean up the chaos pod
if err := r.Client.Get(context.Background(), types.NamespacedName{Name: target, Namespace: req.Namespace}, &p); errors.IsNotFound(err) {
if err := r.Client.Get(context.Background(), types.NamespacedName{Name: target, Namespace: req.Namespace}, &p); apierrors.IsNotFound(err) {
r.log.Warnw("orphaned chaos pod detected, will attempt to delete", "chaosPod", chaosPod.Name)
controllerutil.RemoveFinalizer(&chaosPod, chaostypes.ChaosPodFinalizer)

Expand Down Expand Up @@ -786,7 +768,7 @@ func (r *DisruptionReconciler) handleChaosPodTermination(instance *chaosv1beta1.
// ignore it if it is not ready anymore
err := r.TargetSelector.TargetIsHealthy(target, r.Client, instance)
if err != nil {
if errors.IsNotFound(err) || strings.ToLower(err.Error()) == "pod is not running" || strings.ToLower(err.Error()) == "node is not ready" {
if apierrors.IsNotFound(err) || strings.ToLower(err.Error()) == "pod is not running" || strings.ToLower(err.Error()) == "node is not ready" {
// if the target is not in a good shape, we still run the cleanup phase but we don't check for any issues happening during
// the cleanup to avoid blocking the disruption deletion for nothing
r.log.Infow("target is not likely to be cleaned (either it does not exist anymore or it is not ready), the injector will TRY to clean it but will not take care about any failures", "target", target)
Expand Down
31 changes: 0 additions & 31 deletions types/disruption_error.go

This file was deleted.