Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(trafficrouting): Fix downtime on initial deployment using Istio DestinationRule Subsets. Fixes #2507 #3602

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions rollout/trafficrouting.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ func (c *Controller) NewTrafficRoutingReconciler(roCtx *rolloutContext) ([]traff
}
if rollout.Spec.Strategy.Canary.TrafficRouting.Istio != nil {
if c.IstioController.VirtualServiceInformer.HasSynced() {
trafficReconcilers = append(trafficReconcilers, istio.NewReconciler(rollout, c.IstioController.DynamicClientSet, c.recorder, c.IstioController.VirtualServiceLister, c.IstioController.DestinationRuleLister))
trafficReconcilers = append(trafficReconcilers, istio.NewReconciler(rollout, c.IstioController.DynamicClientSet, c.recorder, c.IstioController.VirtualServiceLister, c.IstioController.DestinationRuleLister, roCtx.allRSs))
} else {
trafficReconcilers = append(trafficReconcilers, istio.NewReconciler(rollout, c.IstioController.DynamicClientSet, c.recorder, nil, nil))
trafficReconcilers = append(trafficReconcilers, istio.NewReconciler(rollout, c.IstioController.DynamicClientSet, c.recorder, nil, nil, roCtx.allRSs))
}
}
if rollout.Spec.Strategy.Canary.TrafficRouting.Nginx != nil {
Expand Down
20 changes: 18 additions & 2 deletions rollout/trafficrouting/istio/istio.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@ import (
"fmt"
"strings"

replicasetutil "github.com/argoproj/argo-rollouts/utils/replicaset"
"github.com/mitchellh/mapstructure"
appsv1 "k8s.io/api/apps/v1"

"github.com/argoproj/argo-rollouts/rollout/trafficrouting"

jsonpatch "github.com/evanphx/json-patch/v5"
"github.com/mitchellh/mapstructure"
log "github.com/sirupsen/logrus"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -32,7 +35,7 @@ const Type = "Istio"
const SpecHttpNotFound = "spec.http not found"

// NewReconciler returns a reconciler struct that brings the Virtual Service into the desired state
func NewReconciler(r *v1alpha1.Rollout, client dynamic.Interface, recorder record.EventRecorder, virtualServiceLister, destinationRuleLister dynamiclister.Lister) *Reconciler {
func NewReconciler(r *v1alpha1.Rollout, client dynamic.Interface, recorder record.EventRecorder, virtualServiceLister, destinationRuleLister dynamiclister.Lister, replicaSets []*appsv1.ReplicaSet) *Reconciler {
return &Reconciler{
rollout: r,
log: logutil.WithRollout(r),
Expand All @@ -41,6 +44,7 @@ func NewReconciler(r *v1alpha1.Rollout, client dynamic.Interface, recorder recor
recorder: recorder,
virtualServiceLister: virtualServiceLister,
destinationRuleLister: destinationRuleLister,
replicaSets: replicaSets,
}
}

Expand All @@ -52,6 +56,7 @@ type Reconciler struct {
recorder record.EventRecorder
virtualServiceLister dynamiclister.Lister
destinationRuleLister dynamiclister.Lister
replicaSets []*appsv1.ReplicaSet
}

type virtualServicePatch struct {
Expand Down Expand Up @@ -315,6 +320,17 @@ func (r *Reconciler) reconcileVirtualService(obj *unstructured.Unstructured, vsv
}

func (r *Reconciler) UpdateHash(canaryHash, stableHash string, additionalDestinations ...v1alpha1.WeightDestination) error {
// We need to check if the replicasets are ready here as well if we didn't define any services in the rollout
// See: https://github.com/argoproj/argo-rollouts/issues/2507
if r.rollout.Spec.Strategy.Canary.CanaryService == "" && r.rollout.Spec.Strategy.Canary.StableService == "" {
Comment on lines +323 to +325

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we know for sure this is being properly checked when services are defined? In the example in #3681 I am defining both a canary service and stable service but still experiencing a brief downtime/outage after the rollout ends.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aaah, if you explicitly define the services, the existing code should actually be working - the fix I built here is explicitly for when you are using the Istio DestionationRule-method to update your traffic (https://argo-rollouts.readthedocs.io/en/stable/features/traffic-management/istio/#subset-level-traffic-splitting to be precise).

If you define the services (I did not see those in your example, so that's why I thought it might be related to this), there are other codepaths that actually check service-readiness. There might be a big/issue in there, but then your issue is at least definitely NOT related to mine.


for _, rs := range r.replicaSets {
if *rs.Spec.Replicas > 0 && !replicasetutil.IsReplicaSetAvailable(rs) {
return fmt.Errorf("delaying destination rule switch: ReplicaSet %s not fully available", rs.Name)
}
}
}

dRuleSpec := r.rollout.Spec.Strategy.Canary.TrafficRouting.Istio.DestinationRule
if dRuleSpec == nil {
return nil
Expand Down
Loading
Loading