From 74306fd76862dd8744ca29f58059907707d061a9 Mon Sep 17 00:00:00 2001 From: Kanha gupta Date: Tue, 30 Apr 2024 03:37:03 +0530 Subject: [PATCH] Pre-installation testing framework Signed-off-by: Kanha gupta --- .github/workflows/kind.yml | 9 +- go.mod | 1 + go.sum | 2 + pkg/antctl/antctl.go | 7 + pkg/antctl/raw/check/cluster/command.go | 222 ++++++++++++++++++ .../cluster/test_checkCNIAvailability.go | 41 ++++ .../test_checkcontrolplaneavailability.go | 41 ++++ .../raw/check/cluster/test_checkk8sversion.go | 31 +++ .../check/cluster/test_checkovsloadable.go | 40 ++++ pkg/antctl/raw/check/installation/command.go | 69 +----- pkg/antctl/raw/check/util.go | 68 ++++++ 11 files changed, 462 insertions(+), 69 deletions(-) create mode 100644 pkg/antctl/raw/check/cluster/command.go create mode 100644 pkg/antctl/raw/check/cluster/test_checkCNIAvailability.go create mode 100644 pkg/antctl/raw/check/cluster/test_checkcontrolplaneavailability.go create mode 100644 pkg/antctl/raw/check/cluster/test_checkk8sversion.go create mode 100644 pkg/antctl/raw/check/cluster/test_checkovsloadable.go diff --git a/.github/workflows/kind.yml b/.github/workflows/kind.yml index 845d5e3a9ca..08bd2934b81 100644 --- a/.github/workflows/kind.yml +++ b/.github/workflows/kind.yml @@ -772,13 +772,16 @@ jobs: - name: Create Kind Cluster run: | kind create cluster --config ci/kind/config-3nodes.yml + - name: Build antctl binary + run: | + make antctl-linux + - name: Run Pre-installation checks + run: | + ./bin/antctl-linux check cluster - name: Load Docker images and deploy Antrea run: | kind load docker-image antrea/antrea-controller-ubuntu-coverage:latest antrea/antrea-agent-ubuntu-coverage:latest kubectl apply -f build/yamls/antrea.yml - - name: Build antctl binary - run: | - make antctl-linux - name: Run antctl command run: | ./bin/antctl-linux check installation diff --git a/go.mod b/go.mod index d6440796f73..dd51184f57b 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( github.com/gogo/protobuf v1.3.2 github.com/google/btree v1.1.2 github.com/google/uuid v1.6.0 + github.com/hashicorp/go-version v1.6.0 github.com/hashicorp/memberlist v0.5.1 github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.3.0 github.com/k8snetworkplumbingwg/sriov-cni v2.1.0+incompatible diff --git a/go.sum b/go.sum index 41c8d242ffc..cf4ad4344e4 100644 --- a/go.sum +++ b/go.sum @@ -413,6 +413,8 @@ github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerX github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek= +github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc= diff --git a/pkg/antctl/antctl.go b/pkg/antctl/antctl.go index 2addc2d44e9..946dbb3d53e 100644 --- a/pkg/antctl/antctl.go +++ b/pkg/antctl/antctl.go @@ -19,6 +19,7 @@ import ( agentapis "antrea.io/antrea/pkg/agent/apis" fallbackversion "antrea.io/antrea/pkg/antctl/fallback/version" + checkcluster "antrea.io/antrea/pkg/antctl/raw/check/cluster" checkinstallation "antrea.io/antrea/pkg/antctl/raw/check/installation" "antrea.io/antrea/pkg/antctl/raw/featuregates" "antrea.io/antrea/pkg/antctl/raw/multicluster" @@ -640,6 +641,12 @@ $ antctl get podmulticaststats pod -n namespace`, supportController: false, commandGroup: check, }, + { + cobraCommand: checkcluster.Command(), + supportAgent: false, + supportController: false, + commandGroup: check, + }, { cobraCommand: supportbundle.Command, supportAgent: true, diff --git a/pkg/antctl/raw/check/cluster/command.go b/pkg/antctl/raw/check/cluster/command.go new file mode 100644 index 00000000000..15849e537ab --- /dev/null +++ b/pkg/antctl/raw/check/cluster/command.go @@ -0,0 +1,222 @@ +package cluster + +import ( + "context" + "fmt" + "os" + "time" + + "github.com/spf13/cobra" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + + "antrea.io/antrea/pkg/antctl/raw/check" +) + +func Command() *cobra.Command { + o := newOptions() + command := &cobra.Command{ + Use: "cluster", + Short: "Runs pre installation checks", + RunE: func(cmd *cobra.Command, args []string) error { + return Run(o) + }, + } + command.Flags().StringVarP(&o.antreaNamespace, "Namespace", "n", o.antreaNamespace, "Configure Namespace in which Antrea is running") + return command +} + +type options struct { + antreaNamespace string +} + +func newOptions() *options { + return &options{ + antreaNamespace: "kube-system", + } +} + +const ( + antreaNamespace = "kube-system" + deploymentName = "cluster-check" + podReadyTimeout = 1 * time.Minute +) + +type Test interface { + Run(ctx context.Context, testContext *testContext) error +} + +var testsRegistry = make(map[string]Test) + +func RegisterTest(name string, test Test) { + testsRegistry[name] = test +} + +type testContext struct { + client kubernetes.Interface + config *rest.Config + clusterName string + antreaNamespace string +} + +func Run(o *options) error { + client, config, clusterName, err := check.NewClient() + if err != nil { + return fmt.Errorf("unable to create Kubernetes client: %s", err) + } + ctx := context.Background() + testContext := NewTestContext(client, config, clusterName, o) + if err := testContext.setup(ctx); err != nil { + return err + } + for name, test := range testsRegistry { + testContext.Header("Running test: %s", name) + if err := test.Run(ctx, testContext); err != nil { + testContext.Header("Test %s failed: %s", name, err) + } else { + testContext.Header("Test %s passed", name) + } + } + testContext.Log("Test finished") + testContext.teardown(ctx, deploymentName, antreaNamespace) + return nil +} + +func (t *testContext) setup(ctx context.Context) error { + deployment := check.NewDeployment(check.DeploymentParameters{ + Name: deploymentName, + Image: "alpine", + Replicas: 1, + Command: []string{"sleep", "infinity"}, + Labels: map[string]string{"app": "cluster-check"}, + HostNetwork: true, + VolumeMounts: []corev1.VolumeMount{ + {Name: "cni-conf", MountPath: "/etc/cni/net.d"}, + {Name: "lib-modules", MountPath: "/lib/modules"}, + {Name: "os-info", MountPath: "/etc/os-release"}, + }, + Tolerations: []corev1.Toleration{ + { + Key: "node-role.kubernetes.io/control-plane", + Operator: "Exists", + Effect: "NoSchedule", + }, + { + Key: "node.kubernetes.io/not-ready", + Operator: "Exists", + Effect: "NoSchedule", + }, + }, + Volumes: []corev1.Volume{ + { + Name: "cni-conf", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/etc/cni/net.d", + }, + }, + }, + { + Name: "lib-modules", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/lib/modules", + Type: hostPathTypePtr("Directory"), + }, + }, + }, + { + Name: "os-info", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/etc/os-release", + Type: hostPathTypePtr("File"), + }, + }, + }, + }, + }) + + t.Log("Creating Deployment") + _, err := t.client.AppsV1().Deployments(antreaNamespace).Create(ctx, deployment, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("unable to create Deployment: %w", err) + } + + t.Log("Waiting for Deployment to become ready") + t.waitForDeploymentsReady(ctx, time.Second, podReadyTimeout, deploymentName) + if err != nil { + return fmt.Errorf("error while waiting for Deployment to become ready: %w", err) + } + return nil +} + +func hostPathTypePtr(s string) *corev1.HostPathType { + v := corev1.HostPathType(s) + return &v +} + +func NewTestContext(client kubernetes.Interface, config *rest.Config, clusterName string, o *options) *testContext { + return &testContext{ + client: client, + config: config, + clusterName: clusterName, + antreaNamespace: o.antreaNamespace, + } +} + +func (t *testContext) teardown(ctx context.Context, deploymentName, namespace string) error { + err := t.client.AppsV1().Deployments(namespace).Delete(ctx, deploymentName, metav1.DeleteOptions{}) + if err != nil { + return err + } + t.Log("Waiting for the deletion of Deployment %s in Namespace %s...", deploymentName, namespace) + err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 1*time.Minute, true, func(ctx context.Context) (bool, error) { + _, err := t.client.AppsV1().Deployments(namespace).Get(ctx, deploymentName, metav1.GetOptions{}) + if errors.IsNotFound(err) { + return true, nil + } + if err != nil { + return false, err + } + return false, nil + }) + if err != nil { + return fmt.Errorf("error waiting for Deployment %s to be deleted in Namespace %s: %w", deploymentName, namespace, err) + } + t.Log("Deployment %s successfully deleted from Namespace %s", deploymentName, namespace) + return nil +} + +func (t *testContext) waitForDeploymentsReady(ctx context.Context, interval, timeout time.Duration, deployments ...string) error { + for _, deployment := range deployments { + t.Log("Waiting for Deployment %s to become ready...", deployment) + err := wait.PollUntilContextTimeout(ctx, interval, timeout, false, func(ctx context.Context) (bool, error) { + ready, err := check.DeploymentIsReady(ctx, t.client, t.antreaNamespace, deployment) + if err != nil { + return false, fmt.Errorf("error checking readiness of Deployment %s: %w", deployment, err) + } + return ready, nil + }) + if err != nil { + return fmt.Errorf("waiting for Deployment %s to become ready has been interrupted: %w", deployment, err) + } + t.Log("Deployment %s is ready.", deployment) + } + return nil +} + +func (t *testContext) Log(format string, a ...interface{}) { + fmt.Fprintf(os.Stdout, fmt.Sprintf("[%s] ", t.clusterName)+format+"\n", a...) +} + +func (t *testContext) Header(format string, a ...interface{}) { + t.Log("-------------------------------------------------------------------------------------------") + t.Log(format, a...) + t.Log("-------------------------------------------------------------------------------------------") +} diff --git a/pkg/antctl/raw/check/cluster/test_checkCNIAvailability.go b/pkg/antctl/raw/check/cluster/test_checkCNIAvailability.go new file mode 100644 index 00000000000..4a1e07f93fb --- /dev/null +++ b/pkg/antctl/raw/check/cluster/test_checkCNIAvailability.go @@ -0,0 +1,41 @@ +package cluster + +import ( + "context" + "fmt" + "strings" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "antrea.io/antrea/pkg/antctl/raw/check" +) + +type checkCNIAvailability struct{} + +func init() { + RegisterTest("Check if another CNI is Present", &checkCNIAvailability{}) +} + +func (t *checkCNIAvailability) Run(ctx context.Context, testContext *testContext) error { + pods, err := testContext.client.CoreV1().Pods(antreaNamespace).List(ctx, metav1.ListOptions{LabelSelector: "app=cluster-check"}) + if err != nil { + return fmt.Errorf("failed to list Pods: %v", err) + } + for _, pod := range pods.Items { + testContext.Log("Checking if CNI is present in Pod: %s", pod.Name) + command := []string{"ls", "/etc/cni/net.d"} + output, _, err := check.ExecInPod(ctx, testContext.client, testContext.config, antreaNamespace, pod.Name, "", command) + if err != nil { + testContext.Log("failed to execute command in pod: %s, error: %v", pod.Name, err) + continue + } + outputStr := strings.TrimSpace(output) + if outputStr == "" { + testContext.Log("No files present in /etc/cni/net.d in pod: %s", pod.Name) + return nil + } else { + return fmt.Errorf("error: files found in /host/etc/cni/net.d in pod: %s", outputStr) + } + } + return nil +} diff --git a/pkg/antctl/raw/check/cluster/test_checkcontrolplaneavailability.go b/pkg/antctl/raw/check/cluster/test_checkcontrolplaneavailability.go new file mode 100644 index 00000000000..e055d9cd2e8 --- /dev/null +++ b/pkg/antctl/raw/check/cluster/test_checkcontrolplaneavailability.go @@ -0,0 +1,41 @@ +package cluster + +import ( + "context" + "fmt" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type checkControlPlaneAvailability struct{} + +func init() { + RegisterTest("Check Control Plane Availability", &checkControlPlaneAvailability{}) +} + +func (t *checkControlPlaneAvailability) Run(ctx context.Context, testContext *testContext) error { + labelSelector := "component=kube-controller-manager,tier=control-plane" + namespaces, err := testContext.client.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list Namespaces: %w", err) + } + controlPlaneFound := false + for _, namespace := range namespaces.Items { + pods, err := testContext.client.CoreV1().Pods(namespace.Name).List(ctx, metav1.ListOptions{LabelSelector: labelSelector}) + if err != nil { + return fmt.Errorf("failed to list Pods in Namespace %s: %w", namespace.Name, err) + } + if len(pods.Items) > 0 { + controlPlaneFound = true + for _, pod := range pods.Items { + testContext.Log("Control plane Pod %s found in Namespace %s\n", pod.Name, namespace.Name) + } + } + } + if !controlPlaneFound { + testContext.Log("Warning: No control plane Pods found in any Namespace.") + } else { + testContext.Log("Control plane Pods found in one or more Namespaces.") + } + return nil +} diff --git a/pkg/antctl/raw/check/cluster/test_checkk8sversion.go b/pkg/antctl/raw/check/cluster/test_checkk8sversion.go new file mode 100644 index 00000000000..649e8c6e49e --- /dev/null +++ b/pkg/antctl/raw/check/cluster/test_checkk8sversion.go @@ -0,0 +1,31 @@ +package cluster + +import ( + "context" + "fmt" + "strings" + + "github.com/hashicorp/go-version" +) + +type checkK8sVersion struct{} + +func init() { + RegisterTest("Check K8s Version", &checkK8sVersion{}) +} + +func (t *checkK8sVersion) Run(ctx context.Context, testContext *testContext) error { + discoveryClient := testContext.client.Discovery() + serverVersion, err := discoveryClient.ServerVersion() + if err != nil { + return fmt.Errorf("error getting server version: %v", err) + } + currentVersion, err := version.NewVersion(strings.TrimPrefix(serverVersion.GitVersion, "v")) + minVersion, _ := version.NewVersion("1.19") + if currentVersion.GreaterThanOrEqual(minVersion) { + testContext.Log("Kubernetes server version is compatible with Antrea. version : %s", serverVersion.GitVersion) + } else { + testContext.Log("Kubernetes min version required : 1.19") + } + return err +} diff --git a/pkg/antctl/raw/check/cluster/test_checkovsloadable.go b/pkg/antctl/raw/check/cluster/test_checkovsloadable.go new file mode 100644 index 00000000000..98ba8320bbf --- /dev/null +++ b/pkg/antctl/raw/check/cluster/test_checkovsloadable.go @@ -0,0 +1,40 @@ +package cluster + +import ( + "context" + "fmt" + "strings" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "antrea.io/antrea/pkg/antctl/raw/check" +) + +type checkOVSLoadable struct{} + +func init() { + RegisterTest("Check if Openvswitch is Loadable", &checkOVSLoadable{}) +} + +func (c *checkOVSLoadable) Run(ctx context.Context, testContext *testContext) error { + pods, err := testContext.client.CoreV1().Pods(antreaNamespace).List(ctx, metav1.ListOptions{LabelSelector: "name=cluster-check"}) + if err != nil { + testContext.Log("Failed to list pods: %v", err) + return fmt.Errorf("failed to list Pods: %v", err) + } + for _, pod := range pods.Items { + cmd := []string{"modprobe", "openvswitch"} + _, stderr, err := check.ExecInPod(ctx, testContext.client, testContext.config, antreaNamespace, pod.Name, "", cmd) + if err != nil { + if strings.Contains(stderr, "not found") { + testContext.Log("Open vSwitch kernel module is not loadable in Pod %s: %s", pod.Name, stderr) + } else { + testContext.Log("Error executing modprobe in Pod %s: %s", pod.Name, stderr) + return fmt.Errorf("error executing modprobe for openvswitch in Pod %s: %s", pod.Name, err) + } + } else { + testContext.Log("Open vSwitch kernel module loaded successfully in Pod %s", pod.Name) + } + } + return nil +} diff --git a/pkg/antctl/raw/check/installation/command.go b/pkg/antctl/raw/check/installation/command.go index 405f7b13f04..f83fdd1208c 100644 --- a/pkg/antctl/raw/check/installation/command.go +++ b/pkg/antctl/raw/check/installation/command.go @@ -23,7 +23,6 @@ import ( "time" "github.com/spf13/cobra" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" @@ -132,68 +131,6 @@ func newService(name string, selector map[string]string, port int) *corev1.Servi } } -type deploymentParameters struct { - Name string - Role string - Image string - Replicas int - Port int - Command []string - Affinity *corev1.Affinity - Tolerations []corev1.Toleration - Labels map[string]string -} - -func newDeployment(p deploymentParameters) *appsv1.Deployment { - if p.Replicas == 0 { - p.Replicas = 1 - } - replicas32 := int32(p.Replicas) - labels := map[string]string{ - "name": p.Name, - "kind": p.Role, - } - return &appsv1.Deployment{ - ObjectMeta: metav1.ObjectMeta{ - Name: p.Name, - Labels: labels, - }, - Spec: appsv1.DeploymentSpec{ - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Name: p.Name, - Labels: labels, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: p.Name, - Env: []corev1.EnvVar{ - {Name: "PORT", Value: fmt.Sprintf("%d", p.Port)}, - }, - Ports: []corev1.ContainerPort{ - {ContainerPort: int32(p.Port)}, - }, - Image: p.Image, - ImagePullPolicy: corev1.PullIfNotPresent, - Command: p.Command, - }, - }, - Affinity: p.Affinity, - Tolerations: p.Tolerations, - }, - }, - Replicas: &replicas32, - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": p.Name, - "kind": p.Role, - }, - }, - }, - } -} - func NewTestContext(client kubernetes.Interface, config *rest.Config, clusterName string, o *options) *testContext { return &testContext{ client: client, @@ -259,7 +196,7 @@ func (t *testContext) setup(ctx context.Context) error { Effect: "NoSchedule", }, } - echoDeployment := newDeployment(deploymentParameters{ + echoDeployment := check.NewDeployment(check.DeploymentParameters{ Name: echoSameNodeDeploymentName, Role: kindEchoName, Port: 80, @@ -291,7 +228,7 @@ func (t *testContext) setup(ctx context.Context) error { return fmt.Errorf("unable to create Deployment %s: %s", echoSameNodeDeploymentName, err) } t.Log("Deploying client Deployment %s...", clientDeploymentName) - clientDeployment := newDeployment(deploymentParameters{ + clientDeployment := check.NewDeployment(check.DeploymentParameters{ Name: clientDeploymentName, Role: kindClientName, Image: deploymentImage, @@ -311,7 +248,7 @@ func (t *testContext) setup(ctx context.Context) error { if err != nil { return err } - echoOtherNodeDeployment := newDeployment(deploymentParameters{ + echoOtherNodeDeployment := check.NewDeployment(check.DeploymentParameters{ Name: echoOtherNodeDeploymentName, Role: kindEchoName, Port: 80, diff --git a/pkg/antctl/raw/check/util.go b/pkg/antctl/raw/check/util.go index 2a6e29936e6..a1dc4cf2a0b 100644 --- a/pkg/antctl/raw/check/util.go +++ b/pkg/antctl/raw/check/util.go @@ -103,3 +103,71 @@ func ExecInPod(ctx context.Context, client kubernetes.Interface, config *rest.Co } return stdout.String(), stderr.String(), nil } + +func NewDeployment(p DeploymentParameters) *appsv1.Deployment { + if p.Replicas == 0 { + p.Replicas = 1 + } + replicas32 := int32(p.Replicas) + labels := map[string]string{ + "name": p.Name, + "kind": p.Role, + } + var ports []corev1.ContainerPort + if p.Port > 0 { + ports = append(ports, corev1.ContainerPort{ContainerPort: int32(p.Port)}) + } + var env []corev1.EnvVar + if p.Port > 0 { + env = append(env, corev1.EnvVar{Name: "PORT", Value: fmt.Sprintf("%d", p.Port)}) + } + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: p.Name, + Labels: labels, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas32, + Selector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + }, + Spec: corev1.PodSpec{ + HostNetwork: p.HostNetwork, + Containers: []corev1.Container{ + { + Name: p.Name, + Image: p.Image, + Ports: ports, + Env: env, + ImagePullPolicy: corev1.PullIfNotPresent, + Command: p.Command, + VolumeMounts: p.VolumeMounts, + }, + }, + Tolerations: p.Tolerations, + Volumes: p.Volumes, + Affinity: p.Affinity, + }, + }, + }, + } +} + +type DeploymentParameters struct { + Name string + Role string + Image string + Replicas int + Port int + Command []string + Affinity *corev1.Affinity + Tolerations []corev1.Toleration + Labels map[string]string + VolumeMounts []corev1.VolumeMount + Volumes []corev1.Volume + HostNetwork bool +}