Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add leader-election option #20

Merged
merged 12 commits into from
Aug 1, 2024
14 changes: 14 additions & 0 deletions charts/node-ipam-controller/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,17 @@ Create the name of the service account to use
{{- define "node-ipam-controller.serviceAccountName" -}}
{{- default (include "node-ipam-controller.fullname" .) .Values.serviceAccount.name }}
{{- end }}

{{/*
Return the name of the Role
*/}}
{{- define "node-ipam-controller.roleName" -}}
{{- printf "%s-%s" .Release.Name "node-ipam-controller-role" -}}
{{- end -}}

{{/*
Return the name of the RoleBinding
*/}}
{{- define "node-ipam-controller.roleBindingName" -}}
{{- printf "%s-%s" .Release.Name "node-ipam-controller-rolebinding" -}}
{{- end -}}
17 changes: 17 additions & 0 deletions charts/node-ipam-controller/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ spec:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: {{ .Chart.Name }}
{{- if .Values.leaderElection.enabled }}
args:
- --enable-leader-election=true
{{- end }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
Expand All @@ -46,6 +50,15 @@ spec:
periodSeconds: 10
resources:
{{- toYaml .Values.resources | nindent 12 }}
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
Expand All @@ -54,6 +67,10 @@ spec:
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.topologySpreadConstraints }}
topologySpreadConstraints:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
Expand Down
30 changes: 30 additions & 0 deletions charts/node-ipam-controller/templates/serviceaccount.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,33 @@ subjects:
- kind: ServiceAccount
name: {{ include "node-ipam-controller.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
---
{{- if .Values.leaderElection.enabled -}}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ include "node-ipam-controller.roleName" . }}
mneverov marked this conversation as resolved.
Show resolved Hide resolved
namespace: {{ .Release.Namespace }}
rules:
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create","get","list"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["update"]
resourceNames: ["node-ipam-controller"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "node-ipam-controller.roleBindingName" . }}
namespace: {{ .Release.Namespace }}
subjects:
- kind: ServiceAccount
name: {{ include "node-ipam-controller.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
roleRef:
kind: Role
name: {{ include "node-ipam-controller.roleName" . }}
apiGroup: rbac.authorization.k8s.io
{{- end -}}
14 changes: 14 additions & 0 deletions charts/node-ipam-controller/values.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Specifies the replica count for Deployment
# Set leaderElection if you want to use more than 1 replica
replicaCount: 1

leaderElection:
enabled: false

image:
repository: ghcr.io/sigs.k8s.io/node-ipam-controller
pullPolicy: IfNotPresent
Expand Down Expand Up @@ -39,6 +44,15 @@ resources:
memory: 128Mi

nodeSelector: {}

topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app.kubernetes.io/name: node-ipam-controller

tolerations:
- effect: NoExecute
key: node.kubernetes.io/not-ready
Expand Down
32 changes: 25 additions & 7 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package main

import (
"context"
"errors"
"flag"
"fmt"
Expand All @@ -30,10 +31,12 @@ import (
clientset "sigs.k8s.io/node-ipam-controller/pkg/client/clientset/versioned"
informers "sigs.k8s.io/node-ipam-controller/pkg/client/informers/externalversions"
"sigs.k8s.io/node-ipam-controller/pkg/controller/ipam"
"sigs.k8s.io/node-ipam-controller/pkg/leaderelection"
"sigs.k8s.io/node-ipam-controller/pkg/signals"

kubeinformers "k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.
Expand All @@ -45,14 +48,16 @@ import (

func main() {
var (
apiServerURL string
kubeconfig string
healthProbeAddr string
apiServerURL string
kubeconfig string
healthProbeAddr string
enableLeaderElection bool
)

flag.StringVar(&kubeconfig, "kubeconfig", "", "Path to a kubeconfig. Only required if out-of-cluster.")
flag.StringVar(&apiServerURL, "apiserver", "", "The address of the Kubernetes API server. Overrides any value in kubeconfig. Only required if out-of-cluster.")
flag.StringVar(&healthProbeAddr, "health-probe-address", ":8081", "Specifies the TCP address for the health server to listen on.")
flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, "Enable leader election for the controller manager. Ensures there is only one active controller manager.")
ugur99 marked this conversation as resolved.
Show resolved Hide resolved

c := logsapi.NewLoggingConfiguration()
logsapi.AddGoFlags(c, flag.CommandLine)
Expand All @@ -65,6 +70,7 @@ func main() {
ctx := signals.SetupSignalHandler()
logger := klog.FromContext(ctx)

server := startHealthProbeServer(healthProbeAddr, logger)
ugur99 marked this conversation as resolved.
Show resolved Hide resolved
cfg, err := clientcmd.BuildConfigFromFlags(apiServerURL, kubeconfig)
if err != nil {
logger.Error(err, "failed to build kubeconfig")
Expand All @@ -77,6 +83,22 @@ func main() {
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}

if enableLeaderElection {
logger.Info("Leader election is enabled.")
leaderelection.StartLeaderElection(ctx, kubeClient, func(ctx context.Context) {
runControllers(ctx, kubeClient, cfg, logger)
})
} else {
logger.Info("Leader election is disabled.")
runControllers(ctx, kubeClient, cfg, logger)
}

if err := server.Shutdown(ctx); err != nil {
logger.Error(err, "failed to shut down health server")
}
}

func runControllers(ctx context.Context, kubeClient kubernetes.Interface, cfg *rest.Config, logger klog.Logger) {
cidrClient, err := clientset.NewForConfig(cfg)
if err != nil {
logger.Error(err, "failed to build kubernetes clientset")
Expand Down Expand Up @@ -111,11 +133,7 @@ func main() {
kubeInformerFactory.Start(ctx.Done())
sharedInformerFactory.Start(ctx.Done())

server := startHealthProbeServer(healthProbeAddr, logger)
nodeIpamController.Run(ctx)
if err := server.Shutdown(ctx); err != nil {
logger.Error(err, "failed to shut down health server")
}
}

// startHealthProbeServer starts a web server that has two endpoints `/readyz` and `/healthz` and always responds
Expand Down
63 changes: 63 additions & 0 deletions pkg/leaderelection/leaderelection.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package leaderelection

import (
"context"
"os"
"time"

"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/leaderelection"
"k8s.io/client-go/tools/leaderelection/resourcelock"
"k8s.io/klog/v2"
)

// StartLeaderElection starts the leader election process
func StartLeaderElection(ctx context.Context, kubeClient kubernetes.Interface, runFunc func(ctx context.Context)) {
id := os.Getenv("POD_NAME")
if id == "" {
klog.Fatalf("POD_NAME environment variable not set")
}

namespace := os.Getenv("POD_NAMESPACE")
if namespace == "" {
klog.Fatalf("POD_NAMESPACE environment variable not set")
}

rl, err := resourcelock.New(
resourcelock.LeasesResourceLock,
namespace,
"node-ipam-controller",
kubeClient.CoreV1(),
kubeClient.CoordinationV1(),
resourcelock.ResourceLockConfig{
Identity: id,
},
)
if err != nil {
klog.Fatalf("failed to create leader election lock: %v", err)
}

leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{
mneverov marked this conversation as resolved.
Show resolved Hide resolved
Lock: rl,
LeaseDuration: 15 * time.Second,
mneverov marked this conversation as resolved.
Show resolved Hide resolved
RenewDeadline: 10 * time.Second,
RetryPeriod: 2 * time.Second,
Callbacks: leaderelection.LeaderCallbacks{
OnStartedLeading: func(ctx context.Context) {
klog.Info("Started leading")
mneverov marked this conversation as resolved.
Show resolved Hide resolved
runFunc(ctx)
},
OnStoppedLeading: func() {
klog.Info("Stopped leading")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
mneverov marked this conversation as resolved.
Show resolved Hide resolved
},
OnNewLeader: func(identity string) {
if identity == id {
klog.Info("I am the new leader")
} else {
klog.Infof("New leader elected: %s", identity)
}
},
},
})
}
Loading