Skip to content

Commit

Permalink
Fix nil pointer issue when clusterset is deleted
Browse files Browse the repository at this point in the history
1. When the ClusterSet is deleted in leader Namespace, there will be
   a nil pointer issue due to data racing like below, this will cause
leader controller crash and restart. Add a new CleanupMembers function
to remove all members when the ClusterSet is deleted.
```
leader_clusterset_controller.go:72] "Received ClusterSet delete" config=""
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x18 pc=0x188b9b1]

goroutine 274 [running]:
antrea.io/antrea/multicluster/controllers/multicluster.(*MemberClusterAnnounceReconciler).processMCSStatus(0xc00052ff20)
	/antrea/multicluster/controllers/multicluster/memberclusterannounce_controller.go:181 +0x331
```
2. Update the license format in auto-generated deepcopy file and refine
   related logic as others. Let it to be updated by `make codegen`
3. Update contrller-gen version to v0.9.0
4. Update controller-runtime version to v0.12.1

Signed-off-by: Lan Luo <luola@vmware.com>
  • Loading branch information
luolanzone committed Jun 21, 2022
1 parent 52bb02e commit 67636f7
Show file tree
Hide file tree
Showing 11 changed files with 91 additions and 110 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ require (
github.com/miekg/dns v1.1.43
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822
github.com/onsi/ginkgo v1.16.5
github.com/onsi/gomega v1.17.0
github.com/onsi/gomega v1.18.1
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.12.1
github.com/prometheus/common v0.32.1
Expand Down Expand Up @@ -74,7 +74,7 @@ require (
k8s.io/kubectl v0.24.0
k8s.io/kubelet v0.24.0
k8s.io/utils v0.0.0-20220210201930-3a6ce19ff2f9
sigs.k8s.io/controller-runtime v0.11.2
sigs.k8s.io/controller-runtime v0.12.1
sigs.k8s.io/mcs-api v0.1.0
)

Expand Down
35 changes: 7 additions & 28 deletions go.sum

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions multicluster/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ SHELL = /usr/bin/env bash -o pipefail
GO_VERSION := $(shell head -n 1 ../build/images/deps/go-version)
DOCKER_BUILD_ARGS = --build-arg GO_VERSION=$(GO_VERSION)

YEAR :=$(shell date "+%Y")
.PHONY: all
all: build

Expand Down Expand Up @@ -53,7 +54,7 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust
$(CURDIR)/hack/generate-manifest.sh -m > build/yamls/antrea-multicluster-member.yml

generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt",year=$(YEAR) paths="./..."

fmt: ## Run go fmt against code.
go fmt ./...
Expand Down Expand Up @@ -82,7 +83,7 @@ else
endif
##@ Build

build: generate fmt vet ## Build manager binary.
build: fmt vet ## Build manager binary.
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o bin/antrea-mc-controller antrea.io/antrea/multicluster/cmd/...

run: manifests generate fmt vet ## Run a controller from your host.
Expand All @@ -92,7 +93,7 @@ run: manifests generate fmt vet ## Run a controller from your host.

CONTROLLER_GEN = $(shell pwd)/bin/controller-gen
controller-gen: ## Download controller-gen locally if necessary.
$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.4.1)
$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/controller-gen@v0.9.0)

codegen:
./hack/update-codegen.sh
Expand All @@ -106,7 +107,7 @@ TMP_DIR=$$(mktemp -d) ;\
cd $$TMP_DIR ;\
go mod init tmp ;\
echo "Downloading $(2)" ;\
GOBIN=$(PROJECT_DIR)/bin go get $(2) ;\
GOBIN=$(PROJECT_DIR)/bin go install $(2) ;\
rm -rf $$TMP_DIR ;\
}
endef
28 changes: 13 additions & 15 deletions multicluster/apis/multicluster/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,16 @@ func (r *LeaderClusterSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
if !errors.IsNotFound(err) {
return ctrl.Result{}, err
}
klog.InfoS("Received ClusterSet delete", "config", klog.KObj(clusterSet))
for _, removedMember := range r.clusterSetConfig.Spec.Members {
r.StatusManager.RemoveMember(common.ClusterID(removedMember.ClusterID))
}
klog.InfoS("Received ClusterSet delete", "clusterset", klog.KObj(clusterSet))
r.StatusManager.CleanupMembers()
r.clusterSetConfig = nil
r.clusterID = common.InvalidClusterID
r.clusterSetID = common.InvalidClusterSetID

return ctrl.Result{}, nil
}

klog.InfoS("Received ClusterSet add/update", "config", klog.KObj(clusterSet))
klog.InfoS("Received ClusterSet add/update", "clusterset", klog.KObj(clusterSet))

// Handle create or update
// If create, make sure the local ClusterClaim is part of the leader config
Expand Down Expand Up @@ -175,8 +173,8 @@ func (r *LeaderClusterSetReconciler) runBackgroundTasks() {
// statues across all clusters. Message will be empty and Reason
// will be "NoReadyCluster"
func (r *LeaderClusterSetReconciler) updateStatus() {
defer r.mutex.Unlock()
r.mutex.Lock()
defer r.mutex.Unlock()

if r.clusterSetConfig == nil {
// Nothing to do.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,7 @@ func TestLeaderClusterSetDelete(t *testing.T) {
err = fakeRemoteClient.Delete(context.TODO(), clusterSet)
assert.Equal(t, nil, err)

mockStatusManager.EXPECT().RemoveMember(common.ClusterID("east"))
mockStatusManager.EXPECT().RemoveMember(common.ClusterID("west"))
mockStatusManager.EXPECT().CleanupMembers()

req := ctrl.Request{
NamespacedName: types.NamespacedName{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func (r *MemberClusterSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
if !apierrors.IsNotFound(err) {
return ctrl.Result{}, err
}
klog.InfoS("Received ClusterSet delete", "config", klog.KObj(clusterSet))
klog.InfoS("Received ClusterSet delete", "clusterset", klog.KObj(clusterSet))
stopErr := r.remoteCommonAreaManager.Stop()
r.remoteCommonAreaManager = nil
r.clusterSetConfig = nil
Expand All @@ -96,7 +96,7 @@ func (r *MemberClusterSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
return ctrl.Result{}, stopErr
}

klog.InfoS("Received ClusterSet add/update", "config", klog.KObj(clusterSet))
klog.InfoS("Received ClusterSet add/update", "clusterset", klog.KObj(clusterSet))

// Handle create or update
if r.clusterSetConfig == nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ type MemberClusterAnnounceReconciler struct {
}

type MemberClusterStatusManager interface {
AddMember(MemberId common.ClusterID)
RemoveMember(MemberId common.ClusterID)
AddMember(memberID common.ClusterID)
RemoveMember(memberID common.ClusterID)
CleanupMembers()

GetMemberClusterStatuses() []multiclusterv1alpha1.ClusterStatus
}
Expand Down Expand Up @@ -106,8 +107,8 @@ func (r *MemberClusterAnnounceReconciler) Reconcile(ctx context.Context, req ctr
return ctrl.Result{}, client.IgnoreNotFound(err)
}

defer r.mapLock.Unlock()
r.mapLock.Lock()
defer r.mapLock.Unlock()

if data, ok := r.timerData[common.ClusterID(memberAnnounce.ClusterID)]; ok {
klog.V(2).InfoS("Reset lastUpdateTime", "Cluster", memberAnnounce.ClusterID)
Expand Down Expand Up @@ -163,20 +164,20 @@ func (r *MemberClusterAnnounceReconciler) SetupWithManager(mgr ctrl.Manager) err
}

func (r *MemberClusterAnnounceReconciler) processMCSStatus() {
defer r.mapLock.Unlock()
r.mapLock.Lock()
defer r.mapLock.Unlock()

for member, data := range r.timerData {
if data.lastUpdateTime.IsZero() {
// Member has never connected to the local cluster, no status update
continue
}
status := r.memberStatus[member]
// Check if the member has connected atleast once in the last 3 intervals.
// Check if the member has connected at least once in the last 3 intervals.
duration := time.Since(data.lastUpdateTime)
klog.V(2).InfoS("Timer processing", "Cluster", member, "duration", duration)
if duration <= ConnectionTimeout {
// Member has updated MemberClusterStatus atleast once in the last 3 intervals.
// Member has updated MemberClusterStatus at least once in the last 3 intervals.
// If last status is not connected, then update the status.
for index := range status.Conditions {
condition := &status.Conditions[index]
Expand Down Expand Up @@ -232,10 +233,10 @@ func (r *MemberClusterAnnounceReconciler) processMCSStatus() {

/******************************* MemberClusterStatusManager methods *******************************/

func (r *MemberClusterAnnounceReconciler) AddMember(MemberId common.ClusterID) {
defer r.mapLock.Unlock()
func (r *MemberClusterAnnounceReconciler) AddMember(memberID common.ClusterID) {
r.mapLock.Lock()
if _, ok := r.memberStatus[MemberId]; ok {
defer r.mapLock.Unlock()
if _, ok := r.memberStatus[memberID]; ok {
// already present
return
}
Expand All @@ -256,30 +257,40 @@ func (r *MemberClusterAnnounceReconciler) AddMember(MemberId common.ClusterID) {
Reason: ReasonNeverConnected,
})

r.memberStatus[MemberId] = &multiclusterv1alpha1.ClusterStatus{ClusterID: string(MemberId),
r.memberStatus[memberID] = &multiclusterv1alpha1.ClusterStatus{ClusterID: string(memberID),
Conditions: conditions}

r.timerData[MemberId] = &timerData{connected: false, lastUpdateTime: time.Time{}}
r.timerData[memberID] = &timerData{connected: false, lastUpdateTime: time.Time{}}
klog.InfoS("new member is added", "member", memberID)
}

func (r *MemberClusterAnnounceReconciler) RemoveMember(MemberId common.ClusterID) {
defer r.mapLock.Unlock()
func (r *MemberClusterAnnounceReconciler) RemoveMember(memberID common.ClusterID) {
r.mapLock.Lock()
defer r.mapLock.Unlock()

if _, ok := r.memberStatus[MemberId]; ok {
delete(r.memberStatus, MemberId)
if _, ok := r.memberStatus[memberID]; ok {
delete(r.memberStatus, memberID)
return
}

if _, ok := r.timerData[MemberId]; ok {
delete(r.timerData, MemberId)
if _, ok := r.timerData[memberID]; ok {
klog.InfoS("remove members", "member", memberID)
delete(r.timerData, memberID)
return
}
}

func (r *MemberClusterAnnounceReconciler) GetMemberClusterStatuses() []multiclusterv1alpha1.ClusterStatus {
func (r *MemberClusterAnnounceReconciler) CleanupMembers() {
r.mapLock.Lock()
defer r.mapLock.Unlock()

r.memberStatus = make(map[common.ClusterID]*multiclusterv1alpha1.ClusterStatus)
r.timerData = make(map[common.ClusterID]*timerData)
}

func (r *MemberClusterAnnounceReconciler) GetMemberClusterStatuses() []multiclusterv1alpha1.ClusterStatus {
r.mapLock.Lock()
defer r.mapLock.Unlock()

status := make([]multiclusterv1alpha1.ClusterStatus, len(r.memberStatus))

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion multicluster/hack/update-codegen-dockerized.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ GOPATH=`go env GOPATH`
ANTREA_PKG="antrea.io/antrea"

cd multicluster
rm -rf pkg/client/{clientset,informers,listers}

function reset_year_change {
set +x
Expand Down Expand Up @@ -60,5 +59,8 @@ $GOPATH/bin/informer-gen \
--output-package "${ANTREA_PKG}/multicluster/pkg/client/informers" \
--go-header-file hack/boilerplate.go.txt

go get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.9.0
$GOPATH/bin/controller-gen object:headerFile="hack/boilerplate.go.txt",year=$(date "+%Y") paths="./..."

cd ..
reset_year_change
Loading

0 comments on commit 67636f7

Please sign in to comment.