Skip to content

Commit

Permalink
Fix nil pointer issue when clusterset is deleted in leader (#3915)
Browse files Browse the repository at this point in the history
When the ClusterSet is deleted in leader Namespace, there will be
a nil pointer issue due to data inconsistency. This issue will cause
leader controller crash and restart. Refined codes to make data
consistent.

Signed-off-by: Lan Luo <luola@vmware.com>
  • Loading branch information
luolanzone committed Jun 21, 2022
1 parent c6fae02 commit b2c245c
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (r *LeaderClusterSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
if !errors.IsNotFound(err) {
return ctrl.Result{}, err
}
klog.InfoS("Received ClusterSet delete", "config", klog.KObj(clusterSet))
klog.InfoS("Received ClusterSet delete", "clusterset", klog.KObj(clusterSet))
for _, removedMember := range r.clusterSetConfig.Spec.Members {
r.StatusManager.RemoveMember(common.ClusterID(removedMember.ClusterID))
}
Expand All @@ -80,7 +80,7 @@ func (r *LeaderClusterSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
return ctrl.Result{}, nil
}

klog.InfoS("Received ClusterSet add/update", "config", klog.KObj(clusterSet))
klog.InfoS("Received ClusterSet add/update", "clusterset", klog.KObj(clusterSet))

// Handle create or update
// If create, make sure the local ClusterClaim is part of the leader config
Expand Down Expand Up @@ -175,8 +175,8 @@ func (r *LeaderClusterSetReconciler) runBackgroundTasks() {
// statues across all clusters. Message will be empty and Reason
// will be "NoReadyCluster"
func (r *LeaderClusterSetReconciler) updateStatus() {
defer r.mutex.Unlock()
r.mutex.Lock()
defer r.mutex.Unlock()

if r.clusterSetConfig == nil {
// Nothing to do.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func (r *MemberClusterSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
if !apierrors.IsNotFound(err) {
return ctrl.Result{}, err
}
klog.InfoS("Received ClusterSet delete", "config", klog.KObj(clusterSet))
klog.InfoS("Received ClusterSet delete", "clusterset", klog.KObj(clusterSet))
stopErr := r.remoteCommonAreaManager.Stop()
r.remoteCommonAreaManager = nil
r.clusterSetConfig = nil
Expand All @@ -96,7 +96,7 @@ func (r *MemberClusterSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
return ctrl.Result{}, stopErr
}

klog.InfoS("Received ClusterSet add/update", "config", klog.KObj(clusterSet))
klog.InfoS("Received ClusterSet add/update", "clusterset", klog.KObj(clusterSet))

// Handle create or update
if r.clusterSetConfig == nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ type MemberClusterAnnounceReconciler struct {
}

type MemberClusterStatusManager interface {
AddMember(MemberId common.ClusterID)
RemoveMember(MemberId common.ClusterID)
AddMember(memberID common.ClusterID)
RemoveMember(memberID common.ClusterID)

GetMemberClusterStatuses() []multiclusterv1alpha1.ClusterStatus
}
Expand Down Expand Up @@ -106,8 +106,8 @@ func (r *MemberClusterAnnounceReconciler) Reconcile(ctx context.Context, req ctr
return ctrl.Result{}, client.IgnoreNotFound(err)
}

defer r.mapLock.Unlock()
r.mapLock.Lock()
defer r.mapLock.Unlock()

if data, ok := r.timerData[common.ClusterID(memberAnnounce.ClusterID)]; ok {
klog.V(2).InfoS("Reset lastUpdateTime", "Cluster", memberAnnounce.ClusterID)
Expand Down Expand Up @@ -163,20 +163,20 @@ func (r *MemberClusterAnnounceReconciler) SetupWithManager(mgr ctrl.Manager) err
}

func (r *MemberClusterAnnounceReconciler) processMCSStatus() {
defer r.mapLock.Unlock()
r.mapLock.Lock()
defer r.mapLock.Unlock()

for member, data := range r.timerData {
if data.lastUpdateTime.IsZero() {
// Member has never connected to the local cluster, no status update
continue
}
status := r.memberStatus[member]
// Check if the member has connected atleast once in the last 3 intervals.
// Check if the member has connected at least once in the last 3 intervals.
duration := time.Since(data.lastUpdateTime)
klog.V(2).InfoS("Timer processing", "Cluster", member, "duration", duration)
if duration <= ConnectionTimeout {
// Member has updated MemberClusterStatus atleast once in the last 3 intervals.
// Member has updated MemberClusterStatus at least once in the last 3 intervals.
// If last status is not connected, then update the status.
for index := range status.Conditions {
condition := &status.Conditions[index]
Expand Down Expand Up @@ -232,10 +232,10 @@ func (r *MemberClusterAnnounceReconciler) processMCSStatus() {

/******************************* MemberClusterStatusManager methods *******************************/

func (r *MemberClusterAnnounceReconciler) AddMember(MemberId common.ClusterID) {
defer r.mapLock.Unlock()
func (r *MemberClusterAnnounceReconciler) AddMember(memberID common.ClusterID) {
r.mapLock.Lock()
if _, ok := r.memberStatus[MemberId]; ok {
defer r.mapLock.Unlock()
if _, ok := r.memberStatus[memberID]; ok {
// already present
return
}
Expand All @@ -256,30 +256,25 @@ func (r *MemberClusterAnnounceReconciler) AddMember(MemberId common.ClusterID) {
Reason: ReasonNeverConnected,
})

r.memberStatus[MemberId] = &multiclusterv1alpha1.ClusterStatus{ClusterID: string(MemberId),
r.memberStatus[memberID] = &multiclusterv1alpha1.ClusterStatus{ClusterID: string(memberID),
Conditions: conditions}

r.timerData[MemberId] = &timerData{connected: false, lastUpdateTime: time.Time{}}
r.timerData[memberID] = &timerData{connected: false, lastUpdateTime: time.Time{}}
klog.InfoS("Added member", "member", memberID)
}

func (r *MemberClusterAnnounceReconciler) RemoveMember(MemberId common.ClusterID) {
defer r.mapLock.Unlock()
func (r *MemberClusterAnnounceReconciler) RemoveMember(memberID common.ClusterID) {
r.mapLock.Lock()
defer r.mapLock.Unlock()

if _, ok := r.memberStatus[MemberId]; ok {
delete(r.memberStatus, MemberId)
return
}

if _, ok := r.timerData[MemberId]; ok {
delete(r.timerData, MemberId)
return
}
delete(r.memberStatus, memberID)
delete(r.timerData, memberID)
klog.InfoS("Removed member", "member", memberID)
}

func (r *MemberClusterAnnounceReconciler) GetMemberClusterStatuses() []multiclusterv1alpha1.ClusterStatus {
defer r.mapLock.Unlock()
r.mapLock.Lock()
defer r.mapLock.Unlock()

status := make([]multiclusterv1alpha1.ClusterStatus, len(r.memberStatus))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,22 @@ func TestStatusAfterAdd(t *testing.T) {
},
}

//[]multiclusterv1alpha1.ClusterStatus
status := memberClusterAnnounceReconcilerUnderTest.GetMemberClusterStatuses()
assert.Equal(t, 1, len(status))
verifyStatus(t, expectedStatus, status[0])
actualStatus := memberClusterAnnounceReconcilerUnderTest.GetMemberClusterStatuses()
actualTimerData := memberClusterAnnounceReconcilerUnderTest.timerData
assert.Equal(t, 1, len(actualStatus))
assert.Equal(t, 1, len(actualTimerData))
verifyStatus(t, expectedStatus, actualStatus[0])
}

func TestStatusAfterDelete(t *testing.T) {
setup()
memberClusterAnnounceReconcilerUnderTest.AddMember("east")
memberClusterAnnounceReconcilerUnderTest.RemoveMember("east")

actualStatus := memberClusterAnnounceReconcilerUnderTest.GetMemberClusterStatuses()
actualTimerData := memberClusterAnnounceReconcilerUnderTest.timerData
assert.Equal(t, 0, len(actualStatus))
assert.Equal(t, 0, len(actualTimerData))
}

func TestStatusAfterReconcile(t *testing.T) {
Expand Down Expand Up @@ -159,10 +171,10 @@ func TestStatusAfterReconcile(t *testing.T) {
}

memberClusterAnnounceReconcilerUnderTest.processMCSStatus()
status := memberClusterAnnounceReconcilerUnderTest.GetMemberClusterStatuses()
klog.V(2).InfoS("Received", "actual", status, "expected", expectedStatus)
assert.Equal(t, 1, len(status))
verifyStatus(t, expectedStatus, status[0])
actualStatus := memberClusterAnnounceReconcilerUnderTest.GetMemberClusterStatuses()
klog.V(2).InfoS("Received", "actual", actualStatus, "expected", expectedStatus)
assert.Equal(t, 1, len(actualStatus))
verifyStatus(t, expectedStatus, actualStatus[0])
}

func TestStatusAfterLeaderElection(t *testing.T) {
Expand Down Expand Up @@ -201,10 +213,10 @@ func TestStatusAfterLeaderElection(t *testing.T) {
}

memberClusterAnnounceReconcilerUnderTest.processMCSStatus()
status := memberClusterAnnounceReconcilerUnderTest.GetMemberClusterStatuses()
klog.V(2).InfoS("Received", "actual", status, "expected", expectedStatus)
assert.Equal(t, 1, len(status))
verifyStatus(t, expectedStatus, status[0])
actualStatus := memberClusterAnnounceReconcilerUnderTest.GetMemberClusterStatuses()
klog.V(2).InfoS("Received", "actual", actualStatus, "expected", expectedStatus)
assert.Equal(t, 1, len(actualStatus))
verifyStatus(t, expectedStatus, actualStatus[0])
}

func TestStatusInNonLeaderCase(t *testing.T) {
Expand Down Expand Up @@ -243,10 +255,10 @@ func TestStatusInNonLeaderCase(t *testing.T) {
}

memberClusterAnnounceReconcilerUnderTest.processMCSStatus()
status := memberClusterAnnounceReconcilerUnderTest.GetMemberClusterStatuses()
klog.V(2).InfoS("Received", "actual", status, "expected", expectedStatus)
assert.Equal(t, 1, len(status))
verifyStatus(t, expectedStatus, status[0])
actualStatus := memberClusterAnnounceReconcilerUnderTest.GetMemberClusterStatuses()
klog.V(2).InfoS("Received", "actual", actualStatus, "expected", expectedStatus)
assert.Equal(t, 1, len(actualStatus))
verifyStatus(t, expectedStatus, actualStatus[0])
}

func verifyStatus(t *testing.T, expected mcsv1alpha1.ClusterStatus, actual mcsv1alpha1.ClusterStatus) {
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b2c245c

Please sign in to comment.