diff --git a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go index 81ba1d1fa52a..183f99c1e288 100644 --- a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go +++ b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go @@ -31,6 +31,8 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" kerrors "k8s.io/apimachinery/pkg/util/errors" + bootstrapapi "k8s.io/cluster-bootstrap/token/api" + bootstrapsecretutil "k8s.io/cluster-bootstrap/util/secrets" "k8s.io/klog/v2" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" @@ -272,7 +274,7 @@ func (r *KubeadmConfigReconciler) reconcile(ctx context.Context, scope *Scope, c // If the BootstrapToken has been generated for a join but the config owner has no nodeRefs, // this indicates that the node has not yet joined and the token in the join config has not // been consumed and it may need a refresh. - return r.refreshBootstrapToken(ctx, config, cluster) + return r.refreshBootstrapTokenIfNeeded(ctx, config, cluster) } if configOwner.IsMachinePool() { // If the BootstrapToken has been generated and infrastructure is ready but the configOwner is a MachinePool, @@ -310,7 +312,7 @@ func (r *KubeadmConfigReconciler) reconcile(ctx context.Context, scope *Scope, c return r.joinWorker(ctx, scope) } -func (r *KubeadmConfigReconciler) refreshBootstrapToken(ctx context.Context, config *bootstrapv1.KubeadmConfig, cluster *clusterv1.Cluster) (ctrl.Result, error) { +func (r *KubeadmConfigReconciler) refreshBootstrapTokenIfNeeded(ctx context.Context, config *bootstrapv1.KubeadmConfig, cluster *clusterv1.Cluster) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) token := config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token @@ -319,12 +321,42 @@ func (r *KubeadmConfigReconciler) refreshBootstrapToken(ctx context.Context, con return ctrl.Result{}, err } - log.Info("Refreshing token until the infrastructure has a chance to consume it") - if err := refreshToken(ctx, remoteClient, token, r.TokenTTL); err != nil { + secret, err := getToken(ctx, remoteClient, token) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to get bootstrap token secret in order to refresh it") + } + log = log.WithValues("Secret", klog.KObj(secret)) + + secretExpiration := bootstrapsecretutil.GetData(secret, bootstrapapi.BootstrapTokenExpirationKey) + if secretExpiration == "" { + log.Info(fmt.Sprintf("Token has no valid value for %s, writing new expiration timestamp", bootstrapapi.BootstrapTokenExpirationKey)) + } else { + // Assuming UTC, since we create the label value with that timezone + expiration, err := time.Parse(time.RFC3339, secretExpiration) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "can't parse expiration time of bootstrap token") + } + + now := time.Now().UTC() + skipTokenRefreshIfExpiringAfter := now.Add(r.skipTokenRefreshIfExpiringAfter()) + if expiration.After(skipTokenRefreshIfExpiringAfter) { + log.V(3).Info("Token needs no refresh", "tokenExpiresInSeconds", expiration.Sub(now).Seconds()) + return ctrl.Result{ + RequeueAfter: r.tokenCheckRefreshOrRotationInterval(), + }, nil + } + } + + // Extend TTL for existing token + newExpiration := time.Now().UTC().Add(r.TokenTTL).Format(time.RFC3339) + secret.Data[bootstrapapi.BootstrapTokenExpirationKey] = []byte(newExpiration) + log.Info("Refreshing token until the infrastructure has a chance to consume it", "oldExpiration", secretExpiration, "newExpiration", newExpiration) + err = remoteClient.Update(ctx, secret) + if err != nil { return ctrl.Result{}, errors.Wrapf(err, "failed to refresh bootstrap token") } return ctrl.Result{ - RequeueAfter: r.TokenTTL / 2, + RequeueAfter: r.tokenCheckRefreshOrRotationInterval(), }, nil } @@ -355,7 +387,7 @@ func (r *KubeadmConfigReconciler) rotateMachinePoolBootstrapToken(ctx context.Co return r.joinWorker(ctx, scope) } return ctrl.Result{ - RequeueAfter: r.TokenTTL / 3, + RequeueAfter: r.tokenCheckRefreshOrRotationInterval(), }, nil } @@ -632,7 +664,9 @@ func (r *KubeadmConfigReconciler) joinWorker(ctx context.Context, scope *Scope) scope.Error(err, "Failed to store bootstrap data") return ctrl.Result{}, err } - return ctrl.Result{}, nil + + // Ensure reconciling this object again so we keep refreshing the bootstrap token until it is consumed + return ctrl.Result{RequeueAfter: r.tokenCheckRefreshOrRotationInterval()}, nil } func (r *KubeadmConfigReconciler) joinControlplane(ctx context.Context, scope *Scope) (ctrl.Result, error) { @@ -737,7 +771,8 @@ func (r *KubeadmConfigReconciler) joinControlplane(ctx context.Context, scope *S return ctrl.Result{}, err } - return ctrl.Result{}, nil + // Ensure reconciling this object again so we keep refreshing the bootstrap token until it is consumed + return ctrl.Result{RequeueAfter: r.tokenCheckRefreshOrRotationInterval()}, nil } // resolveFiles maps .Spec.Files into cloudinit.Files, resolving any object references @@ -817,6 +852,27 @@ func (r *KubeadmConfigReconciler) resolveSecretPasswordContent(ctx context.Conte return data, nil } +// skipTokenRefreshIfExpiringAfter returns a duration. If the token's expiry timestamp is after +// `now + skipTokenRefreshIfExpiringAfter()`, it does not yet need a refresh. +func (r *KubeadmConfigReconciler) skipTokenRefreshIfExpiringAfter() time.Duration { + // Choose according to how often reconciliation is "woken up" by `tokenCheckRefreshOrRotationInterval`. + // Reconciliation should get triggered at least two times, i.e. have two chances to refresh the token (in case of + // one temporary failure), while the token is not refreshed. + return r.TokenTTL * 5 / 6 +} + +// tokenCheckRefreshOrRotationInterval defines when to trigger a reconciliation loop again to refresh or rotate a token. +func (r *KubeadmConfigReconciler) tokenCheckRefreshOrRotationInterval() time.Duration { + // This interval defines how often the reconciler should get triggered. + // + // `r.TokenTTL / 3` means reconciliation gets triggered at least 3 times within the expiry time of the token. The + // third call may be too late, so the first/second call have a chance to extend the expiry (refresh/rotate), + // allowing for one temporary failure. + // + // Related to `skipTokenRefreshIfExpiringAfter` and also token rotation (which is different from refreshing). + return r.TokenTTL / 3 +} + // ClusterToKubeadmConfigs is a handler.ToRequestsFunc to be used to enqueue // requests for reconciliation of KubeadmConfigs. func (r *KubeadmConfigReconciler) ClusterToKubeadmConfigs(ctx context.Context, o client.Object) []ctrl.Request { diff --git a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go index ea814e14b5b7..e1d542f2651f 100644 --- a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go +++ b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go @@ -508,7 +508,7 @@ func TestKubeadmConfigReconciler_Reconcile_GenerateCloudConfigData(t *testing.T) k := &KubeadmConfigReconciler{ Client: myclient, SecretCachingClient: myclient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), KubeadmInitLock: &myInitLocker{}, } @@ -570,7 +570,7 @@ func TestKubeadmConfigReconciler_Reconcile_ErrorIfJoiningControlPlaneHasInvalidC k := &KubeadmConfigReconciler{ Client: myclient, SecretCachingClient: myclient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), KubeadmInitLock: &myInitLocker{}, } @@ -693,7 +693,7 @@ func TestReconcileIfJoinCertificatesAvailableConditioninNodesAndControlPlaneIsRe k := &KubeadmConfigReconciler{ Client: myclient, SecretCachingClient: myclient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), KubeadmInitLock: &myInitLocker{}, } @@ -771,7 +771,7 @@ func TestReconcileIfJoinNodePoolsAndControlPlaneIsReady(t *testing.T) { k := &KubeadmConfigReconciler{ Client: myclient, SecretCachingClient: myclient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), KubeadmInitLock: &myInitLocker{}, } @@ -872,7 +872,7 @@ func TestBootstrapDataFormat(t *testing.T) { k := &KubeadmConfigReconciler{ Client: myclient, SecretCachingClient: myclient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), KubeadmInitLock: &myInitLocker{}, } request := ctrl.Request{ @@ -953,7 +953,7 @@ func TestKubeadmConfigSecretCreatedStatusNotPatched(t *testing.T) { k := &KubeadmConfigReconciler{ Client: myclient, SecretCachingClient: myclient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), KubeadmInitLock: &myInitLocker{}, } request := ctrl.Request{ @@ -1028,12 +1028,13 @@ func TestBootstrapTokenTTLExtension(t *testing.T) { objects = append(objects, createSecrets(t, cluster, initConfig)...) myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}, &clusterv1.Machine{}).Build() + remoteClient := fake.NewClientBuilder().Build() k := &KubeadmConfigReconciler{ Client: myclient, SecretCachingClient: myclient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), KubeadmInitLock: &myInitLocker{}, TokenTTL: DefaultTokenTTL, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, remoteClient, remoteClient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), } request := ctrl.Request{ NamespacedName: client.ObjectKey{ @@ -1043,8 +1044,7 @@ func TestBootstrapTokenTTLExtension(t *testing.T) { } result, err := k.Reconcile(ctx, request) g.Expect(err).ToNot(HaveOccurred()) - g.Expect(result.Requeue).To(BeFalse()) - g.Expect(result.RequeueAfter).To(Equal(time.Duration(0))) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) cfg, err := getKubeadmConfig(myclient, "worker-join-cfg", metav1.NamespaceDefault) g.Expect(err).ToNot(HaveOccurred()) @@ -1060,8 +1060,7 @@ func TestBootstrapTokenTTLExtension(t *testing.T) { } result, err = k.Reconcile(ctx, request) g.Expect(err).ToNot(HaveOccurred()) - g.Expect(result.Requeue).To(BeFalse()) - g.Expect(result.RequeueAfter).To(Equal(time.Duration(0))) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) cfg, err = getKubeadmConfig(myclient, "control-plane-join-cfg", metav1.NamespaceDefault) g.Expect(err).ToNot(HaveOccurred()) @@ -1070,18 +1069,52 @@ func TestBootstrapTokenTTLExtension(t *testing.T) { g.Expect(cfg.Status.ObservedGeneration).NotTo(BeNil()) l := &corev1.SecretList{} - err = myclient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem))) - g.Expect(err).ToNot(HaveOccurred()) - g.Expect(l.Items).To(HaveLen(2)) + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) + g.Expect(l.Items).To(HaveLen(2)) // control plane vs. worker - // ensure that the token is refreshed... + t.Log("Ensure that the token secret is not updated while it's still fresh") tokenExpires := make([][]byte, len(l.Items)) for i, item := range l.Items { tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] } - <-time.After(1 * time.Second) + for _, req := range []ctrl.Request{ + { + NamespacedName: client.ObjectKey{ + Namespace: metav1.NamespaceDefault, + Name: "worker-join-cfg", + }, + }, + { + NamespacedName: client.ObjectKey{ + Namespace: metav1.NamespaceDefault, + Name: "control-plane-join-cfg", + }, + }, + } { + result, err := k.Reconcile(ctx, req) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) + } + + l = &corev1.SecretList{} + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) + g.Expect(l.Items).To(HaveLen(2)) + + for i, item := range l.Items { + // No refresh should have happened since no time passed and the token is therefore still fresh + g.Expect(bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey])).To(BeTrue()) + } + + t.Log("Ensure that the token secret is updated if expiration time is soon") + + for i, item := range l.Items { + // Simulate that expiry time is only TTL/2 from now. This should trigger a refresh. + item.Data[bootstrapapi.BootstrapTokenExpirationKey] = []byte(time.Now().UTC().Add(k.TokenTTL / 2).Format(time.RFC3339)) + g.Expect(remoteClient.Update(ctx, &l.Items[i])).To(Succeed()) + tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] + } for _, req := range []ctrl.Request{ { @@ -1099,20 +1132,28 @@ func TestBootstrapTokenTTLExtension(t *testing.T) { } { result, err := k.Reconcile(ctx, req) g.Expect(err).ToNot(HaveOccurred()) - g.Expect(result.RequeueAfter).NotTo(BeNumerically(">=", k.TokenTTL)) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) } l = &corev1.SecretList{} - err = myclient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem))) - g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) g.Expect(l.Items).To(HaveLen(2)) for i, item := range l.Items { + // Refresh should have happened since expiration is soon g.Expect(bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey])).To(BeFalse()) tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] } - // ...the infrastructure is marked "ready", but token should still be refreshed... + t.Log("If infrastructure is marked ready, the token should still be refreshed") + + for i, item := range l.Items { + // Simulate that expiry time is only TTL/2 from now. This should trigger a refresh. + item.Data[bootstrapapi.BootstrapTokenExpirationKey] = []byte(time.Now().UTC().Add(k.TokenTTL / 2).Format(time.RFC3339)) + g.Expect(remoteClient.Update(ctx, &l.Items[i])).To(Succeed()) + tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] + } + patchHelper, err := patch.NewHelper(workerMachine, myclient) g.Expect(err).ShouldNot(HaveOccurred()) workerMachine.Status.InfrastructureReady = true @@ -1123,8 +1164,6 @@ func TestBootstrapTokenTTLExtension(t *testing.T) { controlPlaneJoinMachine.Status.InfrastructureReady = true g.Expect(patchHelper.Patch(ctx, controlPlaneJoinMachine)).To(Succeed()) - <-time.After(1 * time.Second) - for _, req := range []ctrl.Request{ { NamespacedName: client.ObjectKey{ @@ -1141,20 +1180,28 @@ func TestBootstrapTokenTTLExtension(t *testing.T) { } { result, err := k.Reconcile(ctx, req) g.Expect(err).ToNot(HaveOccurred()) - g.Expect(result.RequeueAfter).NotTo(BeNumerically(">=", k.TokenTTL)) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) } l = &corev1.SecretList{} - err = myclient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem))) - g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) g.Expect(l.Items).To(HaveLen(2)) for i, item := range l.Items { + // Refresh should have happened since expiration is soon, even if infrastructure is ready g.Expect(bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey])).To(BeFalse()) tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] } - // ...until the Nodes have actually joined the cluster and we get a nodeRef + t.Log("When the Nodes have actually joined the cluster and we get a nodeRef, no more refresh should happen") + + for i, item := range l.Items { + // Simulate that expiry time is only TTL/2 from now. This would normally trigger a refresh. + item.Data[bootstrapapi.BootstrapTokenExpirationKey] = []byte(time.Now().UTC().Add(k.TokenTTL / 2).Format(time.RFC3339)) + g.Expect(remoteClient.Update(ctx, &l.Items[i])).To(Succeed()) + tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] + } + patchHelper, err = patch.NewHelper(workerMachine, myclient) g.Expect(err).ShouldNot(HaveOccurred()) workerMachine.Status.NodeRef = &corev1.ObjectReference{ @@ -1173,8 +1220,6 @@ func TestBootstrapTokenTTLExtension(t *testing.T) { } g.Expect(patchHelper.Patch(ctx, controlPlaneJoinMachine)).To(Succeed()) - <-time.After(1 * time.Second) - for _, req := range []ctrl.Request{ { NamespacedName: client.ObjectKey{ @@ -1196,8 +1241,7 @@ func TestBootstrapTokenTTLExtension(t *testing.T) { } l = &corev1.SecretList{} - err = myclient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem))) - g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) g.Expect(l.Items).To(HaveLen(2)) for i, item := range l.Items { @@ -1230,12 +1274,13 @@ func TestBootstrapTokenRotationMachinePool(t *testing.T) { objects = append(objects, createSecrets(t, cluster, initConfig)...) myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}, &expv1.MachinePool{}).Build() + remoteClient := fake.NewClientBuilder().Build() k := &KubeadmConfigReconciler{ Client: myclient, SecretCachingClient: myclient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), KubeadmInitLock: &myInitLocker{}, TokenTTL: DefaultTokenTTL, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, remoteClient, remoteClient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), } request := ctrl.Request{ NamespacedName: client.ObjectKey{ @@ -1245,8 +1290,7 @@ func TestBootstrapTokenRotationMachinePool(t *testing.T) { } result, err := k.Reconcile(ctx, request) g.Expect(err).ToNot(HaveOccurred()) - g.Expect(result.Requeue).To(BeFalse()) - g.Expect(result.RequeueAfter).To(Equal(time.Duration(0))) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) cfg, err := getKubeadmConfig(myclient, "workerpool-join-cfg", metav1.NamespaceDefault) g.Expect(err).ToNot(HaveOccurred()) @@ -1255,71 +1299,89 @@ func TestBootstrapTokenRotationMachinePool(t *testing.T) { g.Expect(cfg.Status.ObservedGeneration).NotTo(BeNil()) l := &corev1.SecretList{} - err = myclient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem))) - g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) g.Expect(l.Items).To(HaveLen(1)) - // ensure that the token is refreshed... + t.Log("Ensure that the token secret is not updated while it's still fresh") tokenExpires := make([][]byte, len(l.Items)) for i, item := range l.Items { tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] } - <-time.After(1 * time.Second) + result, err = k.Reconcile(ctx, request) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) - for _, req := range []ctrl.Request{ - { - NamespacedName: client.ObjectKey{ - Namespace: metav1.NamespaceDefault, - Name: "workerpool-join-cfg", - }, - }, - } { - result, err := k.Reconcile(ctx, req) - g.Expect(err).ToNot(HaveOccurred()) - g.Expect(result.RequeueAfter).NotTo(BeNumerically(">=", k.TokenTTL)) + l = &corev1.SecretList{} + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) + g.Expect(l.Items).To(HaveLen(1)) + + for i, item := range l.Items { + // No refresh should have happened since no time passed and the token is therefore still fresh + g.Expect(bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey])).To(BeTrue()) } - l = &corev1.SecretList{} - err = myclient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem))) + t.Log("Ensure that the token secret is updated if expiration time is soon") + + for i, item := range l.Items { + // Simulate that expiry time is only TTL*3/4 from now. This should trigger a refresh. + item.Data[bootstrapapi.BootstrapTokenExpirationKey] = []byte(time.Now().UTC().Add(k.TokenTTL * 3 / 4).Format(time.RFC3339)) + g.Expect(remoteClient.Update(ctx, &l.Items[i])).To(Succeed()) + tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] + } + + result, err = k.Reconcile(ctx, request) g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) + + l = &corev1.SecretList{} + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) g.Expect(l.Items).To(HaveLen(1)) for i, item := range l.Items { + // Refresh should have happened since expiration is soon g.Expect(bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey])).To(BeFalse()) tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] } - // ...the infrastructure is marked "ready", but token should still be refreshed... + t.Log("If infrastructure is marked ready, the token should still be refreshed") + + for i, item := range l.Items { + // Simulate that expiry time is only TTL*3/4 from now. This should trigger a refresh. + item.Data[bootstrapapi.BootstrapTokenExpirationKey] = []byte(time.Now().UTC().Add(k.TokenTTL * 3 / 4).Format(time.RFC3339)) + g.Expect(remoteClient.Update(ctx, &l.Items[i])).To(Succeed()) + tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] + } + patchHelper, err := patch.NewHelper(workerMachinePool, myclient) g.Expect(err).ShouldNot(HaveOccurred()) workerMachinePool.Status.InfrastructureReady = true g.Expect(patchHelper.Patch(ctx, workerMachinePool, patch.WithStatusObservedGeneration{})).To(Succeed()) - <-time.After(1 * time.Second) - - request = ctrl.Request{ - NamespacedName: client.ObjectKey{ - Namespace: metav1.NamespaceDefault, - Name: "workerpool-join-cfg", - }, - } result, err = k.Reconcile(ctx, request) g.Expect(err).ToNot(HaveOccurred()) - g.Expect(result.RequeueAfter).NotTo(BeNumerically(">=", k.TokenTTL)) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) l = &corev1.SecretList{} - err = myclient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem))) - g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) g.Expect(l.Items).To(HaveLen(1)) for i, item := range l.Items { + // Refresh should have happened since expiration is soon, even if infrastructure is ready g.Expect(bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey])).To(BeFalse()) tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] } - // ...until all nodes have joined + t.Log("When the Nodes have actually joined the cluster and we get a nodeRef, no more refresh should happen") + + for i, item := range l.Items { + // Simulate that expiry time is only TTL*3/4 from now. This would normally trigger a refresh. + item.Data[bootstrapapi.BootstrapTokenExpirationKey] = []byte(time.Now().UTC().Add(k.TokenTTL * 3 / 4).Format(time.RFC3339)) + g.Expect(remoteClient.Update(ctx, &l.Items[i])).To(Succeed()) + tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] + } + workerMachinePool.Status.NodeRefs = []corev1.ObjectReference{ { Kind: "Node", @@ -1329,32 +1391,26 @@ func TestBootstrapTokenRotationMachinePool(t *testing.T) { } g.Expect(patchHelper.Patch(ctx, workerMachinePool, patch.WithStatusObservedGeneration{})).To(Succeed()) - <-time.After(1 * time.Second) - - request = ctrl.Request{ - NamespacedName: client.ObjectKey{ - Namespace: metav1.NamespaceDefault, - Name: "workerpool-join-cfg", - }, - } result, err = k.Reconcile(ctx, request) g.Expect(err).ToNot(HaveOccurred()) g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) l = &corev1.SecretList{} - err = myclient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem))) - g.Expect(err).ToNot(HaveOccurred()) + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) g.Expect(l.Items).To(HaveLen(1)) for i, item := range l.Items { g.Expect(bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey])).To(BeTrue()) } - // before token expires, it should rotate it - tokenExpires[0] = []byte(time.Now().UTC().Add(k.TokenTTL / 5).Format(time.RFC3339)) - l.Items[0].Data[bootstrapapi.BootstrapTokenExpirationKey] = tokenExpires[0] - err = myclient.Update(ctx, &l.Items[0]) - g.Expect(err).ToNot(HaveOccurred()) + t.Log("Token must be rotated before it expires") + + for i, item := range l.Items { + // Simulate that expiry time is only TTL*4/10 from now. This should trigger rotation. + item.Data[bootstrapapi.BootstrapTokenExpirationKey] = []byte(time.Now().UTC().Add(k.TokenTTL * 4 / 10).Format(time.RFC3339)) + g.Expect(remoteClient.Update(ctx, &l.Items[i])).To(Succeed()) + tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] + } request = ctrl.Request{ NamespacedName: client.ObjectKey{ @@ -1364,12 +1420,11 @@ func TestBootstrapTokenRotationMachinePool(t *testing.T) { } result, err = k.Reconcile(ctx, request) g.Expect(err).ToNot(HaveOccurred()) - g.Expect(result.RequeueAfter).To(Equal(time.Duration(0))) + g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3)) l = &corev1.SecretList{} - err = myclient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem))) - g.Expect(err).ToNot(HaveOccurred()) - g.Expect(l.Items).To(HaveLen(2)) + g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed()) + g.Expect(l.Items).To(HaveLen(2)) // old and new token foundOld := false foundNew := true for _, item := range l.Items { @@ -1517,7 +1572,7 @@ func TestKubeadmConfigReconciler_Reconcile_DiscoveryReconcileBehaviors(t *testin k := &KubeadmConfigReconciler{ Client: fakeClient, SecretCachingClient: fakeClient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: tc.cluster.Name, Namespace: tc.cluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: tc.cluster.Name, Namespace: tc.cluster.Namespace}), KubeadmInitLock: &myInitLocker{}, } @@ -1734,7 +1789,7 @@ func TestKubeadmConfigReconciler_Reconcile_AlwaysCheckCAVerificationUnlessReques reconciler := KubeadmConfigReconciler{ Client: myclient, SecretCachingClient: myclient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), KubeadmInitLock: &myInitLocker{}, } @@ -1927,7 +1982,7 @@ func TestKubeadmConfigReconciler_Reconcile_PatchWhenErrorOccurred(t *testing.T) objects = append(objects, s) } - myclient := fake.NewClientBuilder().WithObjects(objects...).Build() + myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}).Build() k := &KubeadmConfigReconciler{ Client: myclient, SecretCachingClient: myclient, diff --git a/bootstrap/kubeadm/internal/controllers/token.go b/bootstrap/kubeadm/internal/controllers/token.go index 7fc2be7586a3..c138b4a6cdd3 100644 --- a/bootstrap/kubeadm/internal/controllers/token.go +++ b/bootstrap/kubeadm/internal/controllers/token.go @@ -87,17 +87,6 @@ func getToken(ctx context.Context, c client.Client, token string) (*corev1.Secre return secret, nil } -// refreshToken extends the TTL for an existing token. -func refreshToken(ctx context.Context, c client.Client, token string, ttl time.Duration) error { - secret, err := getToken(ctx, c, token) - if err != nil { - return err - } - secret.Data[bootstrapapi.BootstrapTokenExpirationKey] = []byte(time.Now().UTC().Add(ttl).Format(time.RFC3339)) - - return c.Update(ctx, secret) -} - // shouldRotate returns true if an existing token is past half of its TTL and should to be rotated. func shouldRotate(ctx context.Context, c client.Client, token string, ttl time.Duration) (bool, error) { secret, err := getToken(ctx, c, token) diff --git a/controllers/remote/cluster_cache_tracker_fake.go b/controllers/remote/cluster_cache_tracker_fake.go index ba1627592936..6062b967a637 100644 --- a/controllers/remote/cluster_cache_tracker_fake.go +++ b/controllers/remote/cluster_cache_tracker_fake.go @@ -24,7 +24,7 @@ import ( ) // NewTestClusterCacheTracker creates a new fake ClusterCacheTracker that can be used by unit tests with fake client. -func NewTestClusterCacheTracker(log logr.Logger, cl client.Client, scheme *runtime.Scheme, objKey client.ObjectKey, watchObjects ...string) *ClusterCacheTracker { +func NewTestClusterCacheTracker(log logr.Logger, cl client.Client, remoteClient client.Client, scheme *runtime.Scheme, objKey client.ObjectKey, watchObjects ...string) *ClusterCacheTracker { testCacheTracker := &ClusterCacheTracker{ log: log, client: cl, @@ -35,7 +35,7 @@ func NewTestClusterCacheTracker(log logr.Logger, cl client.Client, scheme *runti testCacheTracker.clusterAccessors[objKey] = &clusterAccessor{ cache: nil, - client: cl, + client: remoteClient, watches: sets.Set[string]{}.Insert(watchObjects...), } return testCacheTracker diff --git a/exp/internal/controllers/machinepool_controller_phases_test.go b/exp/internal/controllers/machinepool_controller_phases_test.go index f71f7d57d97f..c5a7803029bf 100644 --- a/exp/internal/controllers/machinepool_controller_phases_test.go +++ b/exp/internal/controllers/machinepool_controller_phases_test.go @@ -230,7 +230,7 @@ func TestReconcileMachinePoolPhases(t *testing.T) { fakeClient := fake.NewClientBuilder().WithObjects(defaultCluster, defaultKubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ Client: fakeClient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), } res, err := r.reconcile(ctx, defaultCluster, machinepool) @@ -287,7 +287,7 @@ func TestReconcileMachinePoolPhases(t *testing.T) { fakeClient := fake.NewClientBuilder().WithObjects(defaultCluster, defaultKubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ Client: fakeClient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), } res, err := r.reconcile(ctx, defaultCluster, machinepool) @@ -362,7 +362,7 @@ func TestReconcileMachinePoolPhases(t *testing.T) { fakeClient := fake.NewClientBuilder().WithObjects(defaultCluster, defaultKubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ Client: fakeClient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), } res, err := r.reconcile(ctx, defaultCluster, machinepool) @@ -417,7 +417,7 @@ func TestReconcileMachinePoolPhases(t *testing.T) { fakeClient := fake.NewClientBuilder().WithObjects(defaultCluster, defaultKubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ Client: fakeClient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), } res, err := r.reconcile(ctx, defaultCluster, machinepool) @@ -1702,7 +1702,7 @@ func TestReconcileMachinePoolScaleToFromZero(t *testing.T) { fakeClient := fake.NewClientBuilder().WithObjects(testCluster, kubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ Client: fakeClient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), env.GetClient(), env.GetClient().Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), env.GetClient(), env.GetClient(), env.GetClient().Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), recorder: record.NewFakeRecorder(32), } @@ -1759,7 +1759,7 @@ func TestReconcileMachinePoolScaleToFromZero(t *testing.T) { fakeClient := fake.NewClientBuilder().WithObjects(testCluster, kubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ Client: fakeClient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), env.GetClient(), env.GetClient().Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), env.GetClient(), env.GetClient(), env.GetClient().Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), recorder: record.NewFakeRecorder(32), } @@ -1889,7 +1889,7 @@ func TestReconcileMachinePoolScaleToFromZero(t *testing.T) { fakeClient := fake.NewClientBuilder().WithObjects(testCluster, kubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ Client: fakeClient, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), env.GetClient(), env.GetClient().Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), env.GetClient(), env.GetClient(), env.GetClient().Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), recorder: record.NewFakeRecorder(32), } diff --git a/internal/controllers/machine/machine_controller_test.go b/internal/controllers/machine/machine_controller_test.go index c303500eb404..520b61d1c365 100644 --- a/internal/controllers/machine/machine_controller_test.go +++ b/internal/controllers/machine/machine_controller_test.go @@ -733,7 +733,7 @@ func TestReconcileRequest(t *testing.T) { r := &Reconciler{ Client: clientFake, UnstructuredCachingClient: clientFake, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), clientFake, scheme.Scheme, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), clientFake, clientFake, scheme.Scheme, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), ssaCache: ssa.NewCache(), } @@ -1003,7 +1003,7 @@ func TestMachineConditions(t *testing.T) { Client: clientFake, UnstructuredCachingClient: clientFake, recorder: record.NewFakeRecorder(10), - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), clientFake, scheme.Scheme, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), clientFake, clientFake, scheme.Scheme, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), ssaCache: ssa.NewCache(), } @@ -2198,7 +2198,7 @@ func TestNodeDeletion(t *testing.T) { m.Spec.NodeDeletionTimeout = tc.deletionTimeout fakeClient := tc.createFakeClient(node, m, cpmachine1) - tracker := remote.NewTestClusterCacheTracker(ctrl.Log, fakeClient, fakeScheme, client.ObjectKeyFromObject(&testCluster)) + tracker := remote.NewTestClusterCacheTracker(ctrl.Log, fakeClient, fakeClient, fakeScheme, client.ObjectKeyFromObject(&testCluster)) r := &Reconciler{ Client: fakeClient, diff --git a/internal/controllers/machinehealthcheck/machinehealthcheck_controller_test.go b/internal/controllers/machinehealthcheck/machinehealthcheck_controller_test.go index 93fa8332cab0..0f876aa98371 100644 --- a/internal/controllers/machinehealthcheck/machinehealthcheck_controller_test.go +++ b/internal/controllers/machinehealthcheck/machinehealthcheck_controller_test.go @@ -2601,7 +2601,7 @@ func TestPatchTargets(t *testing.T) { r := &Reconciler{ Client: cl, recorder: record.NewFakeRecorder(32), - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), cl, scheme.Scheme, client.ObjectKey{Name: clusterName, Namespace: namespace}, "machinehealthcheck-watchClusterNodes"), + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), cl, cl, scheme.Scheme, client.ObjectKey{Name: clusterName, Namespace: namespace}, "machinehealthcheck-watchClusterNodes"), } // To make the patch fail, create patchHelper with a different client.