Skip to content

Commit

Permalink
Merge pull request kubernetes-sigs#8358 from ykakarap/unitialized-wor…
Browse files Browse the repository at this point in the history
…ker-nodes-only

🐛 set uninitialized taint only on worker nodes
  • Loading branch information
k8s-ci-robot committed Mar 24, 2023
2 parents 9f45df1 + 1bb9bca commit 9e48879
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 176 deletions.
71 changes: 5 additions & 66 deletions bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,24 +64,6 @@ const (
KubeadmConfigControllerName = "kubeadmconfig-controller"
)

var (
// controlPlaneTaint is the taint that kubeadm applies to the control plane nodes
// for Kubernetes version >= v1.24.0.
// The values are copied from kubeadm codebase.
controlPlaneTaint = corev1.Taint{
Key: "node-role.kubernetes.io/control-plane",
Effect: corev1.TaintEffectNoSchedule,
}

// oldControlPlaneTaint is the taint that kubeadm applies to the control plane nodes
// for Kubernetes version < v1.25.0.
// The values are copied from kubeadm codebase.
oldControlPlaneTaint = corev1.Taint{
Key: "node-role.kubernetes.io/master",
Effect: corev1.TaintEffectNoSchedule,
}
)

const (
// DefaultTokenTTL is the default TTL used for tokens.
DefaultTokenTTL = 15 * time.Minute
Expand Down Expand Up @@ -432,14 +414,7 @@ func (r *KubeadmConfigReconciler) handleClusterNotInitialized(ctx context.Contex
}
}

// Add the node uninitialized taint to the list of taints.
// DeepCopy the InitConfiguration to prevent updating the actual KubeadmConfig.
// Do not modify the KubeadmConfig in etcd as this is a temporary taint that will be dropped after the node
// is initialized by ClusterAPI.
initConfiguration := scope.Config.Spec.InitConfiguration.DeepCopy()
addNodeUninitializedTaint(&initConfiguration.NodeRegistration, true, parsedVersion)

initdata, err := kubeadmtypes.MarshalInitConfigurationForVersion(initConfiguration, parsedVersion)
initdata, err := kubeadmtypes.MarshalInitConfigurationForVersion(scope.Config.Spec.InitConfiguration, parsedVersion)
if err != nil {
scope.Error(err, "Failed to marshal init configuration")
return ctrl.Result{}, err
Expand Down Expand Up @@ -580,7 +555,9 @@ func (r *KubeadmConfigReconciler) joinWorker(ctx context.Context, scope *Scope)
// Do not modify the KubeadmConfig in etcd as this is a temporary taint that will be dropped after the node
// is initialized by ClusterAPI.
joinConfiguration := scope.Config.Spec.JoinConfiguration.DeepCopy()
addNodeUninitializedTaint(&joinConfiguration.NodeRegistration, false, parsedVersion)
if !hasTaint(joinConfiguration.NodeRegistration.Taints, clusterv1.NodeUninitializedTaint) {
joinConfiguration.NodeRegistration.Taints = append(joinConfiguration.NodeRegistration.Taints, clusterv1.NodeUninitializedTaint)
}

joinData, err := kubeadmtypes.MarshalJoinConfigurationForVersion(joinConfiguration, parsedVersion)
if err != nil {
Expand Down Expand Up @@ -688,14 +665,7 @@ func (r *KubeadmConfigReconciler) joinControlplane(ctx context.Context, scope *S
return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kubernetesVersion)
}

// Add the node uninitialized taint to the list of taints.
// DeepCopy the JoinConfiguration to prevent updating the actual KubeadmConfig.
// Do not modify the KubeadmConfig in etcd as this is a temporary taint that will be dropped after the node
// is initialized by ClusterAPI.
joinConfiguration := scope.Config.Spec.JoinConfiguration.DeepCopy()
addNodeUninitializedTaint(&joinConfiguration.NodeRegistration, true, parsedVersion)

joinData, err := kubeadmtypes.MarshalJoinConfigurationForVersion(joinConfiguration, parsedVersion)
joinData, err := kubeadmtypes.MarshalJoinConfigurationForVersion(scope.Config.Spec.JoinConfiguration, parsedVersion)
if err != nil {
scope.Error(err, "Failed to marshal join configuration")
return ctrl.Result{}, err
Expand Down Expand Up @@ -1105,37 +1075,6 @@ func (r *KubeadmConfigReconciler) ensureBootstrapSecretOwnersRef(ctx context.Con
return nil
}

// addNodeUninitializedTaint adds the NodeUninitializedTaint to the nodeRegistration.
// Note: If isControlPlane is true then it also adds the control plane taint if the initial set of taints is nil.
// This is to ensure consistency with kubeadm's defaulting behavior.
func addNodeUninitializedTaint(nodeRegistration *bootstrapv1.NodeRegistrationOptions, isControlPlane bool, kubernetesVersion semver.Version) {
var taints []corev1.Taint
taints = nodeRegistration.Taints
if hasTaint(taints, clusterv1.NodeUninitializedTaint) {
return
}

// For a control plane, kubeadm adds the default control plane taint if the provided taints are nil.
// Since we are adding the uninitialized taint we also have to add the default taints kubeadm would have added if
// the taints were nil.
if isControlPlane && taints == nil {
// Note: Kubeadm uses a different default control plane taint depending on the kubernetes version.
// Ref: https://github.com/kubernetes/kubeadm/issues/2200
if kubernetesVersion.LT(semver.MustParse("1.24.0")) {
taints = []corev1.Taint{oldControlPlaneTaint}
} else if kubernetesVersion.GTE(semver.MustParse("1.24.0")) && kubernetesVersion.LT(semver.MustParse("1.25.0")) {
taints = []corev1.Taint{
oldControlPlaneTaint,
controlPlaneTaint,
}
} else {
taints = []corev1.Taint{controlPlaneTaint}
}
}
taints = append(taints, clusterv1.NodeUninitializedTaint)
nodeRegistration.Taints = taints
}

func hasTaint(taints []corev1.Taint, targetTaint corev1.Taint) bool {
for _, taint := range taints {
if taint.MatchTaint(&targetTaint) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"testing"
"time"

"github.com/blang/semver"
ignition "github.com/flatcar/ignition/config/v2_3"
. "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -2194,112 +2193,6 @@ func TestKubeadmConfigReconciler_ResolveUsers(t *testing.T) {
}
}

func TestAddNodeUninitializedTaint(t *testing.T) {
dummyTaint := corev1.Taint{
Key: "dummy-taint",
Value: "",
Effect: corev1.TaintEffectNoSchedule,
}

tests := []struct {
name string
nodeRegistration *bootstrapv1.NodeRegistrationOptions
kubernetesVersion semver.Version
isControlPlane bool
wantTaints []corev1.Taint
}{
{
name: "for control plane with version < v1.24.0, if taints is nil it should add the uninitialized and the master taint",
nodeRegistration: &bootstrapv1.NodeRegistrationOptions{
Taints: nil,
},
kubernetesVersion: semver.MustParse("1.23.0"),
isControlPlane: true,
wantTaints: []corev1.Taint{
oldControlPlaneTaint,
clusterv1.NodeUninitializedTaint,
},
},
{
name: "for control plane with version >= v1.24.0 and < v1.25.0, if taints is nil it should add the uninitialized, control-plane and the master taints",
nodeRegistration: &bootstrapv1.NodeRegistrationOptions{
Taints: nil,
},
kubernetesVersion: semver.MustParse("1.24.5"),
isControlPlane: true,
wantTaints: []corev1.Taint{
oldControlPlaneTaint,
controlPlaneTaint,
clusterv1.NodeUninitializedTaint,
},
},
{
name: "for control plane with version >= v1.25.0, if taints is nil it should add the uninitialized and the control-plane taint",
nodeRegistration: &bootstrapv1.NodeRegistrationOptions{
Taints: nil,
},
kubernetesVersion: semver.MustParse("1.25.0"),
isControlPlane: true,
wantTaints: []corev1.Taint{
controlPlaneTaint,
clusterv1.NodeUninitializedTaint,
},
},
{
name: "for control plane, if taints is not nil it should only add the uninitialized taint",
nodeRegistration: &bootstrapv1.NodeRegistrationOptions{
Taints: []corev1.Taint{},
},
kubernetesVersion: semver.MustParse("1.26.0"),
isControlPlane: true,
wantTaints: []corev1.Taint{
clusterv1.NodeUninitializedTaint,
},
},
{
name: "for control plane, if it already has some taints it should add the uninitialized taint",
nodeRegistration: &bootstrapv1.NodeRegistrationOptions{
Taints: []corev1.Taint{dummyTaint},
},
kubernetesVersion: semver.MustParse("1.26.0"),
isControlPlane: true,
wantTaints: []corev1.Taint{
dummyTaint,
clusterv1.NodeUninitializedTaint,
},
},
{
name: "for worker, it should add the uninitialized taint",
nodeRegistration: &bootstrapv1.NodeRegistrationOptions{},
kubernetesVersion: semver.MustParse("1.26.0"),
isControlPlane: false,
wantTaints: []corev1.Taint{
clusterv1.NodeUninitializedTaint,
},
},
{
name: "for worker, if it already has some taints it should add the uninitialized taint",
nodeRegistration: &bootstrapv1.NodeRegistrationOptions{
Taints: []corev1.Taint{dummyTaint},
},
kubernetesVersion: semver.MustParse("1.26.0"),
isControlPlane: false,
wantTaints: []corev1.Taint{
dummyTaint,
clusterv1.NodeUninitializedTaint,
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)
addNodeUninitializedTaint(tt.nodeRegistration, tt.isControlPlane, tt.kubernetesVersion)
g.Expect(tt.nodeRegistration.Taints).To(Equal(tt.wantTaints))
})
}
}

// test utils.

// newWorkerMachineForCluster returns a Machine with the passed Cluster's information and a pre-configured name.
Expand Down
2 changes: 1 addition & 1 deletion docs/book/src/developer/providers/bootstrap.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ A bootstrap provider's bootstrap data must create `/run/cluster-api/bootstrap-su

## Taint Nodes at creation

A bootstrap provider can optionally taint nodes at creation with `node.cluster.x-k8s.io/uninitialized:NoSchedule`.
A bootstrap provider can optionally taint worker nodes at creation with `node.cluster.x-k8s.io/uninitialized:NoSchedule`.
This taint is used to prevent workloads to be scheduled on Nodes before the node is initialized by Cluster API.
As of today the Node initialization consists of syncing labels from Machines to Nodes. Once the labels have been
initially synced the taint is removed form the Node.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,20 +101,23 @@ Kubernetes supports both equality and inequality requirements in label selection

In an inequality based selection, the user wants to place a workload on node(s) that do not contain a specific label (e.g. Node.Labels not contain `my.prefix/foo=bar`). The case is potentially problematic because it relies on the absence of a label and this can occur if the pod scheduler runs during the delay interval.

One way to address this is to use kubelet's `--register-with-taints` flag. Newly minted nodes can be tainted via the taint `node.cluster.x-k8s.io=uninitialized:NoSchedule`. Assuming workloads don't have this specific toleration, then nothing should be scheduled. KubeadmConfigTemplate provides the means to set taints on nodes (see JoinConfiguration.NodeRegistration.Taints).
One way to address this is to use kubelet's `--register-with-taints` flag. Newly minted nodes can be tainted via the taint `node.cluster.x-k8s.io/uninitialized:NoSchedule`. Assuming workloads don't have this specific toleration, then nothing should be scheduled. KubeadmConfigTemplate provides the means to set taints on nodes (see JoinConfiguration.NodeRegistration.Taints).

The process of tainting the nodes, can be carried out by the user and can be documented as follows:

```
If you utilize inequality based selection for workload placement, to prevent unintended scheduling of pods during the initial node startup phase, it is recommend that you specify the following taint in your KubeadmConfigTemplate:
`node.cluster.x-k8s.io=uninitialized:NoSchedule`
`node.cluster.x-k8s.io/uninitialized:NoSchedule`
```

After the node has come up and the machine controller has applied the labels, the machine controller will also remove this specific taint if it's present.

During the implementation we will consider also automating the insertion of the taint via CABPK in order to simplify UX;
in this case, the new behaviour should be documented in the contract as optional requirement for bootstrap providers.

The `node.cluster.x-k8s.io/uninitialized:NoSchedule` taint should only be applied on the worker nodes. It should not be applied on the control plane nodes as it could prevent other components like CPI from initializing which will block cluster creation.


## Alternatives

### Use KubeadmConfigTemplate capabilities
Expand Down

0 comments on commit 9e48879

Please sign in to comment.