Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨Add support to MachinePool to reference template objects #4112

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions exp/controllers/machinepool_controller_noderef.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ package controllers
import (
"context"
"fmt"
"time"

"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/patch"
"time"

ctrl "sigs.k8s.io/controller-runtime"

"github.com/pkg/errors"
apicorev1 "k8s.io/api/core/v1"
corev1 "k8s.io/api/core/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4"
"sigs.k8s.io/cluster-api/controllers/noderefutil"
Expand All @@ -42,7 +42,7 @@ var (
)

type getNodeReferencesResult struct {
references []apicorev1.ObjectReference
references []corev1.ObjectReference
available int
ready int
}
Expand Down Expand Up @@ -90,7 +90,7 @@ func (r *MachinePoolReconciler) reconcileNodeRefs(ctx context.Context, cluster *
log.Info("Cannot assign NodeRefs to MachinePool, no matching Nodes")
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}
r.recorder.Event(mp, apicorev1.EventTypeWarning, "FailedSetNodeRef", err.Error())
r.recorder.Event(mp, corev1.EventTypeWarning, "FailedSetNodeRef", err.Error())
return ctrl.Result{}, errors.Wrapf(err, "failed to get node references")
}

Expand All @@ -100,7 +100,7 @@ func (r *MachinePoolReconciler) reconcileNodeRefs(ctx context.Context, cluster *
mp.Status.NodeRefs = nodeRefsResult.references

log.Info("Set MachinePools's NodeRefs", "noderefs", mp.Status.NodeRefs)
r.recorder.Event(mp, apicorev1.EventTypeNormal, "SuccessfulSetNodeRefs", fmt.Sprintf("%+v", mp.Status.NodeRefs))
r.recorder.Event(mp, corev1.EventTypeNormal, "SuccessfulSetNodeRefs", fmt.Sprintf("%+v", mp.Status.NodeRefs))

// Reconcile node annotations.
for _, nodeRef := range nodeRefsResult.references {
Expand Down Expand Up @@ -141,9 +141,9 @@ func (r *MachinePoolReconciler) reconcileNodeRefs(ctx context.Context, cluster *
// deleteRetiredNodes deletes nodes that don't have a corresponding ProviderID in Spec.ProviderIDList.
// A MachinePool infrastructure provider indicates an instance in the set has been deleted by
// removing its ProviderID from the slice.
func (r *MachinePoolReconciler) deleteRetiredNodes(ctx context.Context, c client.Client, nodeRefs []apicorev1.ObjectReference, providerIDList []string) error {
func (r *MachinePoolReconciler) deleteRetiredNodes(ctx context.Context, c client.Client, nodeRefs []corev1.ObjectReference, providerIDList []string) error {
log := ctrl.LoggerFrom(ctx, "providerIDList", len(providerIDList))
nodeRefsMap := make(map[string]*apicorev1.Node, len(nodeRefs))
nodeRefsMap := make(map[string]*corev1.Node, len(nodeRefs))
for _, nodeRef := range nodeRefs {
node := &corev1.Node{}
if err := c.Get(ctx, client.ObjectKey{Name: nodeRef.Name}, node); err != nil {
Expand Down Expand Up @@ -179,8 +179,8 @@ func (r *MachinePoolReconciler) getNodeReferences(ctx context.Context, c client.
log := ctrl.LoggerFrom(ctx, "providerIDList", len(providerIDList))

var ready, available int
nodeRefsMap := make(map[string]apicorev1.Node)
nodeList := apicorev1.NodeList{}
nodeRefsMap := make(map[string]corev1.Node)
nodeList := corev1.NodeList{}
for {
if err := c.List(ctx, &nodeList, client.Continue(nodeList.Continue)); err != nil {
return getNodeReferencesResult{}, errors.Wrapf(err, "failed to List nodes")
Expand All @@ -201,7 +201,7 @@ func (r *MachinePoolReconciler) getNodeReferences(ctx context.Context, c client.
}
}

var nodeRefs []apicorev1.ObjectReference
var nodeRefs []corev1.ObjectReference
for _, providerID := range providerIDList {
pid, err := noderefutil.NewProviderID(providerID)
if err != nil {
Expand All @@ -213,7 +213,7 @@ func (r *MachinePoolReconciler) getNodeReferences(ctx context.Context, c client.
if nodeIsReady(&node) {
ready++
}
nodeRefs = append(nodeRefs, apicorev1.ObjectReference{
nodeRefs = append(nodeRefs, corev1.ObjectReference{
Kind: node.Kind,
APIVersion: node.APIVersion,
Name: node.Name,
Expand All @@ -228,10 +228,10 @@ func (r *MachinePoolReconciler) getNodeReferences(ctx context.Context, c client.
return getNodeReferencesResult{nodeRefs, available, ready}, nil
}

func nodeIsReady(node *apicorev1.Node) bool {
func nodeIsReady(node *corev1.Node) bool {
for _, n := range node.Status.Conditions {
if n.Type == apicorev1.NodeReady {
return n.Status == apicorev1.ConditionTrue
if n.Type == corev1.NodeReady {
return n.Status == corev1.ConditionTrue
}
}
return false
Expand Down
33 changes: 30 additions & 3 deletions exp/controllers/machinepool_controller_phases.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,21 @@ import (
"strings"
"time"

"sigs.k8s.io/cluster-api/util"
ctrl "sigs.k8s.io/controller-runtime"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/utils/pointer"
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4"
"sigs.k8s.io/cluster-api/controllers/external"
capierrors "sigs.k8s.io/cluster-api/errors"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1alpha4"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/cluster-api/util/patch"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/source"
Expand Down Expand Up @@ -108,6 +108,33 @@ func (r *MachinePoolReconciler) reconcileExternal(ctx context.Context, cluster *
return external.ReconcileOutput{Paused: true}, nil
}

if strings.HasSuffix(ref.Kind, external.TemplateSuffix) {
owner := &metav1.OwnerReference{
APIVersion: expv1.GroupVersion.String(),
Kind: "MachinePool",
Name: m.Name,
UID: m.UID,
}
Comment on lines +112 to +117
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't the owner reference already set below?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't t think so

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Current line 118, there is a call that sets it controllerutil.SetControllerReference(m, obj, r.Client.Scheme())

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I use this owner in line 123 passing owner as input in ColneTemplate func so i can't remove it.

ref, err = external.CloneTemplate(ctx, &external.CloneTemplateInput{
Client: r.Client,
TemplateRef: ref,
Namespace: m.Namespace,
ClusterName: cluster.Name,
OwnerRef: owner,
})
Comment on lines +118 to +124
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/hold

Wouldn't this code clone the reference every time it reconciles?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my point of view no because reference is replaced by concrete object reference, so in next reconciliation the code will do not pass inside if block

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ref, err = external.CloneTemplate assignment would only replace the reference for the current function, am I missing something?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

line 128 is getting the ref object and this object returned by function

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To clarify my earlier statement, the MachinePools and MachineDeployments incongruity with KubeadmConfig and KubeadmConfigTemplate respectively caused me some confusion when I first started writing cluster yamls for the AzureMachinePool implementation.

When I initially authored the cluster yamls for AzureMachinePool, I started with a working set of resources from the CAPZ default cluster-template.yaml. I started replacing the MachineDeployment with a MachinePool and kept the KubeadmConfigTemplate. After writing a bit of code, I started testing and found that CAPI MachinePool controller didn't pick up on the KubeadmConfigTemplate, but rather, was looking for a KubeadmConfig. It took a bit of debugging and getting into the internals of the MachinePool controller (I was really new to the project at the time and still trying to wrap my head around the project.)

However much confusion I had previously, I feel better about about the distinction today since, as @CecileRobertMichon pointed out, MachinePools reference 1 bootstrap config, not many instances of a config template. That distinction may not make sense to a newcomer to the project. They may try to write MachinePools in a similar manor as a MachineDeployment without regard to the subtle distinction between them.

I don't feel strongly one way or another. Just thought I'd share my early experiences in case it can provide some color.

Copy link
Author

@felipeweb felipeweb Feb 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What @devigned described was exactly what I went through as well. I don't have a use case for this yet, only those pains that @devigned pointed out.

Perhaps we could put in the developer documentation how and when to use template objects. This is a little confusing for those who are starting to understand the project and looking at MachineDeployment and MachinePool

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. In that case, how can we make this subtle distinction very explicit so it's not so confusing for newcomers? I don't think allowing templates and making MachinePool more like MachineDeployments is the answer here, but we should make it clear why and how they are different.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe pictures would tell the story our words have failed to tell.

Documentation showing a visual representation comparing a MachineDeployment and MachinePool would be super helpful. Perhaps, a multi-step visualization starting from initial apply and ending with deployment completed with multiple machines that describes the anatomy, relationships and choreography of the resources the user specified and the ones created by the controller. This documentation could be less developer focused and more user focused; less state diagrams and more context / intent.

@felipeweb do you think that would have helped you?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@devigned sure!

if err != nil {
return external.ReconcileOutput{}, errors.Wrap(err, "failed to clone template")
}
obj, err = external.Get(ctx, r.Client, ref, m.Namespace)
if err != nil {
if apierrors.IsNotFound(errors.Cause(err)) {
return external.ReconcileOutput{}, errors.Wrapf(err, "could not find %v %q for MachinePool %q in namespace %q, requeuing",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be could not find the MachinePool template?
Also, for my understanding, what kind of error are we trying to catch by getting the cloned template after it is cloned? If the clone operation passed, the template should be there...

Copy link
Author

@felipeweb felipeweb Jan 25, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fabriziopandini I may be wrong because I don't have a deep knowledge of the codebase of the cluster-api, but by my understanding the object to be reconciled must be the concrete object and not the template so I do Get after the clone and if I can't get the concrete object I can't continue the reconciliation

ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace)
}
return external.ReconcileOutput{}, err
}
}

// Initialize the patch helper.
patchHelper, err := patch.NewHelper(obj, r.Client)
if err != nil {
Expand Down