Skip to content

Commit

Permalink
Use permanent errors in the taskrun reconciler
Browse files Browse the repository at this point in the history
Instead of returning nil on error during reconcile, if the error
is transient return it, so that the key is requeue. If the error
is permanent return a permanent error, so that the key is not
requeued.
  • Loading branch information
afrittoli committed Jun 10, 2020
1 parent 3c1c49d commit 81a64c2
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 37 deletions.
47 changes: 27 additions & 20 deletions pkg/reconciler/taskrun/taskrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import (
"k8s.io/apimachinery/pkg/types"
"knative.dev/pkg/apis"
"knative.dev/pkg/configmap"
"knative.dev/pkg/controller"
pkgreconciler "knative.dev/pkg/reconciler"
"knative.dev/pkg/tracker"
)
Expand Down Expand Up @@ -179,7 +180,7 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, tr *v1beta1.TaskRun) pkg
c.Logger.Errorf("TaskRun prepare error: %v", err.Error())
// We only return an error if update failed, otherwise we don't want to
// reconcile an invalid TaskRun anymore
return c.finishReconcileUpdateEmitEvents(tr, nil, nil)
return c.finishReconcileUpdateEmitEvents(tr, nil, err)
}

// Store the condition before reconcile
Expand All @@ -200,6 +201,9 @@ func (c *Reconciler) finishReconcileUpdateEmitEvents(tr *v1beta1.TaskRun, before

_, err := c.updateLabelsAndAnnotations(tr)
events.EmitError(c.Recorder, err, tr)
if controller.IsPermanentError(previousError) {
return controller.NewPermanentError(multierror.Append(previousError, err))
}
return multierror.Append(previousError, err).ErrorOrNil()
}

Expand Down Expand Up @@ -239,7 +243,7 @@ func (c *Reconciler) prepare(ctx context.Context, tr *v1beta1.TaskRun) (*v1beta1
}
c.Logger.Errorf("Failed to determine Task spec to use for taskrun %s: %v", tr.Name, err)
tr.Status.MarkResourceFailed(podconvert.ReasonFailedResolution, err)
return nil, nil, err
return nil, nil, controller.NewPermanentError(err)
}

// Store the fetched TaskSpec on the TaskRun for auditing
Expand Down Expand Up @@ -279,19 +283,19 @@ func (c *Reconciler) prepare(ctx context.Context, tr *v1beta1.TaskRun) (*v1beta1
if err != nil {
c.Logger.Errorf("Failed to resolve references for taskrun %s: %v", tr.Name, err)
tr.Status.MarkResourceFailed(podconvert.ReasonFailedResolution, err)
return nil, nil, err
return nil, nil, controller.NewPermanentError(err)
}

if err := ValidateResolvedTaskResources(tr.Spec.Params, rtr); err != nil {
c.Logger.Errorf("TaskRun %q resources are invalid: %v", tr.Name, err)
tr.Status.MarkResourceFailed(podconvert.ReasonFailedValidation, err)
return nil, nil, err
return nil, nil, controller.NewPermanentError(err)
}

if err := workspace.ValidateBindings(taskSpec.Workspaces, tr.Spec.Workspaces); err != nil {
c.Logger.Errorf("TaskRun %q workspaces are invalid: %v", tr.Name, err)
tr.Status.MarkResourceFailed(podconvert.ReasonFailedValidation, err)
return nil, nil, err
return nil, nil, controller.NewPermanentError(err)
}

// Initialize the cloud events if at least a CloudEventResource is defined
Expand Down Expand Up @@ -353,7 +357,7 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1beta1.TaskRun,
tr.Status.MarkResourceFailed(volumeclaim.ReasonCouldntCreateWorkspacePVC,
fmt.Errorf("Failed to create PVC for TaskRun %s workspaces correctly: %s",
fmt.Sprintf("%s/%s", tr.Namespace, tr.Name), err))
return nil
return controller.NewPermanentError(err)
}

taskRunWorkspaces := applyVolumeClaimTemplates(tr.Spec.Workspaces, tr.GetOwnerReference())
Expand All @@ -363,8 +367,9 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1beta1.TaskRun,

pod, err = c.createPod(ctx, tr, rtr)
if err != nil {
c.handlePodCreationError(tr, err)
return nil
newErr := c.handlePodCreationError(tr, err)
c.Logger.Error("Failed to create task run pod for task %q: %v", tr.Name, newErr)
return newErr
}
go c.timeoutHandler.WaitTaskRun(tr, tr.Status.StartTime)
}
Expand Down Expand Up @@ -415,7 +420,7 @@ func (c *Reconciler) updateLabelsAndAnnotations(tr *v1beta1.TaskRun) (*v1beta1.T
return newTr, nil
}

func (c *Reconciler) handlePodCreationError(tr *v1beta1.TaskRun, err error) {
func (c *Reconciler) handlePodCreationError(tr *v1beta1.TaskRun, err error) error {
var msg string
if isExceededResourceQuotaError(err) {
backoff, currentlyBackingOff := c.timeoutHandler.GetBackoff(tr)
Expand All @@ -429,19 +434,21 @@ func (c *Reconciler) handlePodCreationError(tr *v1beta1.TaskRun, err error) {
Reason: podconvert.ReasonExceededResourceQuota,
Message: fmt.Sprintf("%s: %v", msg, err),
})
// return a transient error, so that the key is requeued
return err
}
// The pod creation failed, not because of quota issues. The most likely
// reason is that something is wrong with the spec of the Task, that we could
// not check with validation before - i.e. pod template fields
msg = fmt.Sprintf("failed to create task run pod %q: %v. Maybe ", tr.Name, err)
if tr.Spec.TaskRef != nil {
msg += fmt.Sprintf("missing or invalid Task %s/%s", tr.Namespace, tr.Spec.TaskRef.Name)
} else {
// The pod creation failed, not because of quota issues. The most likely
// reason is that something is wrong with the spec of the Task, that we could
// not check with validation before - i.e. pod template fields
msg = fmt.Sprintf("failed to create task run pod %q: %v. Maybe ", tr.Name, err)
if tr.Spec.TaskRef != nil {
msg += fmt.Sprintf("missing or invalid Task %s/%s", tr.Namespace, tr.Spec.TaskRef.Name)
} else {
msg += "invalid TaskSpec"
}
tr.Status.MarkResourceFailed(podconvert.ReasonCouldntGetTask, errors.New(msg))
msg += "invalid TaskSpec"
}
c.Logger.Error("Failed to create task run pod for task %q: %v", tr.Name, err)
newErr := controller.NewPermanentError(errors.New(msg))
tr.Status.MarkResourceFailed(podconvert.ReasonCouldntGetTask, newErr)
return newErr
}

// failTaskRun stops a TaskRun with the provided Reason
Expand Down
75 changes: 58 additions & 17 deletions pkg/reconciler/taskrun/taskrun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1326,6 +1326,17 @@ func TestReconcile_SetsStartTime(t *testing.T) {
}
testAssets, cancel := getTaskRunController(t, d)
defer cancel()
clients := testAssets.Clients

t.Logf("Creating SA %s in %s", "default", "foo")
if _, err := clients.Kube.CoreV1().ServiceAccounts("foo").Create(&corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: "default",
Namespace: "foo",
},
}); err != nil {
t.Fatal(err)
}

if err := testAssets.Controller.Reconciler.Reconcile(context.Background(), getRunName(taskRun)); err != nil {
t.Errorf("expected no error reconciling valid TaskRun but got %v", err)
Expand Down Expand Up @@ -1476,16 +1487,18 @@ func TestReconcileInvalidTaskRuns(t *testing.T) {
taskRun: noTaskRun,
reason: podconvert.ReasonFailedResolution,
wantEvents: []string{
"Normal Started ",
"Warning Failed ",
"Normal Started",
"Warning Failed",
"Warning InternalError",
},
}, {
name: "task run with wrong ref",
taskRun: withWrongRef,
reason: podconvert.ReasonFailedResolution,
wantEvents: []string{
"Normal Started ",
"Warning Failed ",
"Normal Started",
"Warning Failed",
"Warning InternalError",
},
}}

Expand All @@ -1495,13 +1508,16 @@ func TestReconcileInvalidTaskRuns(t *testing.T) {
defer cancel()
c := testAssets.Controller
clients := testAssets.Clients
err := c.Reconciler.Reconcile(context.Background(), getRunName(tc.taskRun))
reconcileErr := c.Reconciler.Reconcile(context.Background(), getRunName(tc.taskRun))

// When a TaskRun is invalid and can't run, we don't want to return an error because
// an error will tell the Reconciler to keep trying to reconcile; instead we want to stop
// When a TaskRun is invalid and can't run, we return a permanent error because
// a regular error will tell the Reconciler to keep trying to reconcile; instead we want to stop
// and forget about the Run.
if err != nil {
t.Errorf("Did not expect to see error when reconciling invalid TaskRun but saw %q", err)
if reconcileErr == nil {
t.Fatalf("Expected to see error when reconciling invalid TaskRun but none")
}
if !controller.IsPermanentError(reconcileErr) {
t.Fatalf("Expected to see a permanet error when reconciling invalid TaskRun, got %s instead", reconcileErr)
}

// Check actions and events
Expand All @@ -1510,7 +1526,7 @@ func TestReconcileInvalidTaskRuns(t *testing.T) {
t.Errorf("expected 3 actions (first: list namespaces) created by the reconciler, got %d. Actions: %#v", len(actions), actions)
}

err = checkEvents(testAssets.Recorder, tc.name, tc.wantEvents)
err := checkEvents(testAssets.Recorder, tc.name, tc.wantEvents)
if !(err == nil) {
t.Errorf(err.Error())
}
Expand Down Expand Up @@ -2510,8 +2526,12 @@ func TestReconcileWorkspaceMissing(t *testing.T) {
defer cancel()
clients := testAssets.Clients

if err := testAssets.Controller.Reconciler.Reconcile(context.Background(), getRunName(taskRun)); err != nil {
t.Errorf("expected no error reconciling valid TaskRun but got %v", err)
err := testAssets.Controller.Reconciler.Reconcile(context.Background(), getRunName(taskRun))
if err == nil {
t.Fatalf("expected error reconciling invalid TaskRun but got none")
}
if !controller.IsPermanentError(err) {
t.Fatalf("Expected to see a permanet error when reconciling invalid TaskRun, got %s instead", err)
}

tr, err := clients.Pipeline.TektonV1beta1().TaskRuns(taskRun.Namespace).Get(taskRun.Name, metav1.GetOptions{})
Expand All @@ -2526,7 +2546,7 @@ func TestReconcileWorkspaceMissing(t *testing.T) {
}
}
if !failedCorrectly {
t.Errorf("Expected TaskRun to fail validation but it did not. Final conditions were:\n%#v", tr.Status.Conditions)
t.Fatalf("Expected TaskRun to fail validation but it did not. Final conditions were:\n%#v", tr.Status.Conditions)
}
}

Expand Down Expand Up @@ -2559,7 +2579,8 @@ func TestReconcileTaskResourceResolutionAndValidation(t *testing.T) {
wantFailedReason: podconvert.ReasonFailedResolution,
wantEvents: []string{
"Normal Started ",
"Warning Failed",
"Warning Failed", // Event about the TaskRun state changed
"Warning InternalError", // Event about the error (generated by the genreconciler)
},
}, {
desc: "Fail ValidateResolvedTaskResources",
Expand All @@ -2581,7 +2602,8 @@ func TestReconcileTaskResourceResolutionAndValidation(t *testing.T) {
wantFailedReason: podconvert.ReasonFailedValidation,
wantEvents: []string{
"Normal Started ",
"Warning Failed",
"Warning Failed", // Event about the TaskRun state changed
"Warning InternalError", // Event about the error (generated by the genreconciler)
},
}} {
t.Run(tt.desc, func(t *testing.T) {
Expand All @@ -2591,8 +2613,16 @@ func TestReconcileTaskResourceResolutionAndValidation(t *testing.T) {
clients := testAssets.Clients
c := testAssets.Controller

if err := c.Reconciler.Reconcile(context.Background(), getRunName(tt.d.TaskRuns[0])); err != nil {
t.Errorf("expected no error reconciling valid TaskRun but got %v", err)
reconcileErr := c.Reconciler.Reconcile(context.Background(), getRunName(tt.d.TaskRuns[0]))

// When a TaskRun is invalid and can't run, we return a permanent error because
// a regular error will tell the Reconciler to keep trying to reconcile; instead we want to stop
// and forget about the Run.
if reconcileErr == nil {
t.Fatalf("Expected to see error when reconciling invalid TaskRun but none")
}
if !controller.IsPermanentError(reconcileErr) {
t.Fatalf("Expected to see a permanet error when reconciling invalid TaskRun, got %s instead", reconcileErr)
}

tr, err := clients.Pipeline.TektonV1beta1().TaskRuns(tt.d.TaskRuns[0].Namespace).Get(tt.d.TaskRuns[0].Name, metav1.GetOptions{})
Expand Down Expand Up @@ -2622,6 +2652,7 @@ func TestReconcileWorkspaceWithVolumeClaimTemplate(t *testing.T) {
taskWithWorkspace := tb.Task("test-task-with-workspace", tb.TaskNamespace("foo"),
tb.TaskSpec(
tb.TaskWorkspace(workspaceName, "a test task workspace", "", true),
tb.Step("foo", tb.StepName("simple-step"), tb.StepCommand("/mycmd")),
))
taskRun := tb.TaskRun("test-taskrun-missing-workspace", tb.TaskRunNamespace("foo"), tb.TaskRunSpec(
tb.TaskRunTaskRef(taskWithWorkspace.Name, tb.TaskRefAPIVersion("a1")),
Expand All @@ -2643,6 +2674,16 @@ func TestReconcileWorkspaceWithVolumeClaimTemplate(t *testing.T) {
defer cancel()
clients := testAssets.Clients

t.Logf("Creating SA %s in %s", "default", "foo")
if _, err := clients.Kube.CoreV1().ServiceAccounts("foo").Create(&corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: "default",
Namespace: "foo",
},
}); err != nil {
t.Fatal(err)
}

if err := testAssets.Controller.Reconciler.Reconcile(context.Background(), getRunName(taskRun)); err != nil {
t.Errorf("expected no error reconciling valid TaskRun but got %v", err)
}
Expand Down

0 comments on commit 81a64c2

Please sign in to comment.