Skip to content

Commit

Permalink
Fix nil pointer issue when clusterset is deleted in leader
Browse files Browse the repository at this point in the history
When the ClusterSet is deleted in leader Namespace, there will be
a nil pointer issue due to data inconsistency. This issue will cause
leader controller crash and restart. Refined codes to make data
consistent.

Signed-off-by: Lan Luo <luola@vmware.com>
  • Loading branch information
luolanzone committed Jun 21, 2022
1 parent 52bb02e commit d5895e8
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (r *LeaderClusterSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
if !errors.IsNotFound(err) {
return ctrl.Result{}, err
}
klog.InfoS("Received ClusterSet delete", "config", klog.KObj(clusterSet))
klog.InfoS("Received ClusterSet delete", "clusterset", klog.KObj(clusterSet))
for _, removedMember := range r.clusterSetConfig.Spec.Members {
r.StatusManager.RemoveMember(common.ClusterID(removedMember.ClusterID))
}
Expand All @@ -80,7 +80,7 @@ func (r *LeaderClusterSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
return ctrl.Result{}, nil
}

klog.InfoS("Received ClusterSet add/update", "config", klog.KObj(clusterSet))
klog.InfoS("Received ClusterSet add/update", "clusterset", klog.KObj(clusterSet))

// Handle create or update
// If create, make sure the local ClusterClaim is part of the leader config
Expand Down Expand Up @@ -175,8 +175,8 @@ func (r *LeaderClusterSetReconciler) runBackgroundTasks() {
// statues across all clusters. Message will be empty and Reason
// will be "NoReadyCluster"
func (r *LeaderClusterSetReconciler) updateStatus() {
defer r.mutex.Unlock()
r.mutex.Lock()
defer r.mutex.Unlock()

if r.clusterSetConfig == nil {
// Nothing to do.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func (r *MemberClusterSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
if !apierrors.IsNotFound(err) {
return ctrl.Result{}, err
}
klog.InfoS("Received ClusterSet delete", "config", klog.KObj(clusterSet))
klog.InfoS("Received ClusterSet delete", "clusterset", klog.KObj(clusterSet))
stopErr := r.remoteCommonAreaManager.Stop()
r.remoteCommonAreaManager = nil
r.clusterSetConfig = nil
Expand All @@ -96,7 +96,7 @@ func (r *MemberClusterSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
return ctrl.Result{}, stopErr
}

klog.InfoS("Received ClusterSet add/update", "config", klog.KObj(clusterSet))
klog.InfoS("Received ClusterSet add/update", "clusterset", klog.KObj(clusterSet))

// Handle create or update
if r.clusterSetConfig == nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ type MemberClusterAnnounceReconciler struct {
}

type MemberClusterStatusManager interface {
AddMember(MemberId common.ClusterID)
RemoveMember(MemberId common.ClusterID)
AddMember(memberID common.ClusterID)
RemoveMember(memberID common.ClusterID)

GetMemberClusterStatuses() []multiclusterv1alpha1.ClusterStatus
}
Expand Down Expand Up @@ -106,8 +106,8 @@ func (r *MemberClusterAnnounceReconciler) Reconcile(ctx context.Context, req ctr
return ctrl.Result{}, client.IgnoreNotFound(err)
}

defer r.mapLock.Unlock()
r.mapLock.Lock()
defer r.mapLock.Unlock()

if data, ok := r.timerData[common.ClusterID(memberAnnounce.ClusterID)]; ok {
klog.V(2).InfoS("Reset lastUpdateTime", "Cluster", memberAnnounce.ClusterID)
Expand Down Expand Up @@ -163,20 +163,20 @@ func (r *MemberClusterAnnounceReconciler) SetupWithManager(mgr ctrl.Manager) err
}

func (r *MemberClusterAnnounceReconciler) processMCSStatus() {
defer r.mapLock.Unlock()
r.mapLock.Lock()
defer r.mapLock.Unlock()

for member, data := range r.timerData {
if data.lastUpdateTime.IsZero() {
// Member has never connected to the local cluster, no status update
continue
}
status := r.memberStatus[member]
// Check if the member has connected atleast once in the last 3 intervals.
// Check if the member has connected at least once in the last 3 intervals.
duration := time.Since(data.lastUpdateTime)
klog.V(2).InfoS("Timer processing", "Cluster", member, "duration", duration)
if duration <= ConnectionTimeout {
// Member has updated MemberClusterStatus atleast once in the last 3 intervals.
// Member has updated MemberClusterStatus at least once in the last 3 intervals.
// If last status is not connected, then update the status.
for index := range status.Conditions {
condition := &status.Conditions[index]
Expand Down Expand Up @@ -232,10 +232,10 @@ func (r *MemberClusterAnnounceReconciler) processMCSStatus() {

/******************************* MemberClusterStatusManager methods *******************************/

func (r *MemberClusterAnnounceReconciler) AddMember(MemberId common.ClusterID) {
defer r.mapLock.Unlock()
func (r *MemberClusterAnnounceReconciler) AddMember(memberID common.ClusterID) {
r.mapLock.Lock()
if _, ok := r.memberStatus[MemberId]; ok {
defer r.mapLock.Unlock()
if _, ok := r.memberStatus[memberID]; ok {
// already present
return
}
Expand All @@ -256,30 +256,25 @@ func (r *MemberClusterAnnounceReconciler) AddMember(MemberId common.ClusterID) {
Reason: ReasonNeverConnected,
})

r.memberStatus[MemberId] = &multiclusterv1alpha1.ClusterStatus{ClusterID: string(MemberId),
r.memberStatus[memberID] = &multiclusterv1alpha1.ClusterStatus{ClusterID: string(memberID),
Conditions: conditions}

r.timerData[MemberId] = &timerData{connected: false, lastUpdateTime: time.Time{}}
r.timerData[memberID] = &timerData{connected: false, lastUpdateTime: time.Time{}}
klog.InfoS("Added member", "member", memberID)
}

func (r *MemberClusterAnnounceReconciler) RemoveMember(MemberId common.ClusterID) {
defer r.mapLock.Unlock()
func (r *MemberClusterAnnounceReconciler) RemoveMember(memberID common.ClusterID) {
r.mapLock.Lock()
defer r.mapLock.Unlock()

if _, ok := r.memberStatus[MemberId]; ok {
delete(r.memberStatus, MemberId)
return
}

if _, ok := r.timerData[MemberId]; ok {
delete(r.timerData, MemberId)
return
}
delete(r.memberStatus, memberID)
delete(r.timerData, memberID)
klog.InfoS("Removed member", "member", memberID)
}

func (r *MemberClusterAnnounceReconciler) GetMemberClusterStatuses() []multiclusterv1alpha1.ClusterStatus {
defer r.mapLock.Unlock()
r.mapLock.Lock()
defer r.mapLock.Unlock()

status := make([]multiclusterv1alpha1.ClusterStatus, len(r.memberStatus))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,24 @@ func TestStatusAfterAdd(t *testing.T) {
},
}

//[]multiclusterv1alpha1.ClusterStatus
status := memberClusterAnnounceReconcilerUnderTest.GetMemberClusterStatuses()
expectedTimeData := memberClusterAnnounceReconcilerUnderTest.timerData
assert.Equal(t, 1, len(status))
assert.Equal(t, 1, len(expectedTimeData))
verifyStatus(t, expectedStatus, status[0])
}

func TestStatusAfterDelete(t *testing.T) {
setup()
memberClusterAnnounceReconcilerUnderTest.AddMember("east")
memberClusterAnnounceReconcilerUnderTest.RemoveMember("east")

status := memberClusterAnnounceReconcilerUnderTest.GetMemberClusterStatuses()
expectedTimeData := memberClusterAnnounceReconcilerUnderTest.timerData
assert.Equal(t, 0, len(status))
assert.Equal(t, 0, len(expectedTimeData))
}

func TestStatusAfterReconcile(t *testing.T) {
TestStatusAfterAdd(t)

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d5895e8

Please sign in to comment.