Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect, snapshot and gracefully restore global objects in e2e test #2091

Merged
merged 1 commit into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 42 additions & 32 deletions pkg/cmd/tests/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/scylladb/scylla-operator/test/e2e/framework"
"github.com/spf13/cobra"
apierrors "k8s.io/apimachinery/pkg/util/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/rest"
)

Expand Down Expand Up @@ -42,26 +43,26 @@ var supportedBroadcastAddressTypes = []scyllav1.BroadcastAddressType{
type TestFrameworkOptions struct {
genericclioptions.ClientConfigSet

ArtifactsDir string
DeleteTestingNSPolicyUntyped string
DeleteTestingNSPolicy framework.DeleteTestingNSPolicyType
IngressController *IngressControllerOptions
ScyllaClusterOptionsUntyped *ScyllaClusterOptions
scyllaClusterOptions *framework.ScyllaClusterOptions
ObjectStorageBucket string
GCSServiceAccountKeyPath string
S3CredentialsFilePath string
objectStorageType framework.ObjectStorageType
gcsServiceAccountKey []byte
s3CredentialsFile []byte
ArtifactsDir string
CleanupPolicyUntyped string
CleanupPolicy framework.CleanupPolicyType
IngressController *IngressControllerOptions
ScyllaClusterOptionsUntyped *ScyllaClusterOptions
scyllaClusterOptions *framework.ScyllaClusterOptions
ObjectStorageBucket string
GCSServiceAccountKeyPath string
S3CredentialsFilePath string
objectStorageType framework.ObjectStorageType
gcsServiceAccountKey []byte
s3CredentialsFile []byte
}

func NewTestFrameworkOptions(streams genericclioptions.IOStreams, userAgent string) *TestFrameworkOptions {
return &TestFrameworkOptions{
ClientConfigSet: genericclioptions.NewClientConfigSet(userAgent),
ArtifactsDir: "",
DeleteTestingNSPolicyUntyped: string(framework.DeleteTestingNSPolicyAlways),
IngressController: &IngressControllerOptions{},
ClientConfigSet: genericclioptions.NewClientConfigSet(userAgent),
ArtifactsDir: "",
CleanupPolicyUntyped: string(framework.CleanupPolicyAlways),
IngressController: &IngressControllerOptions{},
ScyllaClusterOptionsUntyped: &ScyllaClusterOptions{
NodeServiceType: string(scyllav1.NodeServiceTypeHeadless),
NodesBroadcastAddressType: string(scyllav1.BroadcastAddressTypePodIP),
Expand All @@ -81,11 +82,20 @@ func (o *TestFrameworkOptions) AddFlags(cmd *cobra.Command) {
o.ClientConfigSet.AddFlags(cmd)

cmd.PersistentFlags().StringVarP(&o.ArtifactsDir, "artifacts-dir", "", o.ArtifactsDir, "A directory for storing test artifacts. No data is collected until set.")
cmd.PersistentFlags().StringVarP(&o.DeleteTestingNSPolicyUntyped, "delete-namespace-policy", "", o.DeleteTestingNSPolicyUntyped, fmt.Sprintf("Namespace deletion policy. Allowed values are [%s].", strings.Join(
cmd.PersistentFlags().StringVarP(&o.CleanupPolicyUntyped, "delete-namespace-policy", "", o.CleanupPolicyUntyped, fmt.Sprintf("Namespace deletion policy. Allowed values are [%s].", strings.Join(
[]string{
string(framework.DeleteTestingNSPolicyAlways),
string(framework.DeleteTestingNSPolicyNever),
string(framework.DeleteTestingNSPolicyOnSuccess),
string(framework.CleanupPolicyAlways),
string(framework.CleanupPolicyNever),
string(framework.CleanupPolicyOnSuccess),
},
", ",
)))
utilruntime.Must(cmd.PersistentFlags().MarkDeprecated("delete-namespace-policy", "--delete-namespace-policy is deprecated - please use --cleanup-policy instead"))
cmd.PersistentFlags().StringVarP(&o.CleanupPolicyUntyped, "cleanup-policy", "", o.CleanupPolicyUntyped, fmt.Sprintf("Cleanup policy. Allowed values are [%s].", strings.Join(
[]string{
string(framework.CleanupPolicyAlways),
string(framework.CleanupPolicyNever),
string(framework.CleanupPolicyOnSuccess),
},
", ",
)))
Expand Down Expand Up @@ -118,10 +128,10 @@ func (o *TestFrameworkOptions) Validate(args []string) error {
errors = append(errors, err)
}

switch p := framework.DeleteTestingNSPolicyType(o.DeleteTestingNSPolicyUntyped); p {
case framework.DeleteTestingNSPolicyAlways,
framework.DeleteTestingNSPolicyOnSuccess,
framework.DeleteTestingNSPolicyNever:
switch p := framework.CleanupPolicyType(o.CleanupPolicyUntyped); p {
case framework.CleanupPolicyAlways,
framework.CleanupPolicyOnSuccess,
framework.CleanupPolicyNever:
default:
errors = append(errors, fmt.Errorf("invalid DeleteTestingNSPolicy: %q", p))
}
Expand Down Expand Up @@ -174,7 +184,7 @@ func (o *TestFrameworkOptions) Complete(args []string) error {
return err
}

o.DeleteTestingNSPolicy = framework.DeleteTestingNSPolicyType(o.DeleteTestingNSPolicyUntyped)
o.CleanupPolicy = framework.CleanupPolicyType(o.CleanupPolicyUntyped)

// Trim spaces so we can reason later if the dir is set or not
o.ArtifactsDir = strings.TrimSpace(o.ArtifactsDir)
Expand Down Expand Up @@ -216,13 +226,13 @@ func (o *TestFrameworkOptions) Complete(args []string) error {
RestConfigs: slices.ConvertSlice(o.ClientConfigs, func(cc genericclioptions.ClientConfig) *rest.Config {
return cc.RestConfig
}),
ArtifactsDir: o.ArtifactsDir,
DeleteTestingNSPolicy: o.DeleteTestingNSPolicy,
ScyllaClusterOptions: o.scyllaClusterOptions,
ObjectStorageType: o.objectStorageType,
ObjectStorageBucket: o.ObjectStorageBucket,
GCSServiceAccountKey: o.gcsServiceAccountKey,
S3CredentialsFile: o.s3CredentialsFile,
ArtifactsDir: o.ArtifactsDir,
CleanupPolicy: o.CleanupPolicy,
ScyllaClusterOptions: o.scyllaClusterOptions,
ObjectStorageType: o.objectStorageType,
ObjectStorageBucket: o.ObjectStorageBucket,
GCSServiceAccountKey: o.gcsServiceAccountKey,
S3CredentialsFile: o.s3CredentialsFile,
}

if o.IngressController != nil {
Expand Down
8 changes: 3 additions & 5 deletions pkg/naming/names.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,13 @@ import (
)

func ManualRef(namespace, name string) string {
if len(namespace) == 0 {
return name
}
return fmt.Sprintf("%s/%s", namespace, name)
}

func ObjRef(obj metav1.Object) string {
namespace := obj.GetNamespace()
if len(namespace) == 0 {
return obj.GetName()
}

return ManualRef(obj.GetNamespace(), obj.GetName())
}

Expand Down
231 changes: 231 additions & 0 deletions test/e2e/framework/cleanup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
package framework

import (
"context"
"fmt"
"path/filepath"

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
"github.com/scylladb/scylla-operator/pkg/gather/collect"
"github.com/scylladb/scylla-operator/pkg/naming"
"github.com/scylladb/scylla-operator/pkg/pointer"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
cacheddiscovery "k8s.io/client-go/discovery/cached/memory"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/util/retry"
"k8s.io/klog/v2"
)

type CleanupInterface interface {
CollectToLog(ctx context.Context)
Collect(ctx context.Context, artifactsDir string, ginkgoNamespace string)
Cleanup(ctx context.Context)
}

type NamespaceCleaner struct {
Client kubernetes.Interface
DynamicClient dynamic.Interface
NS *corev1.Namespace
}

var _ CleanupInterface = &NamespaceCleaner{}

func (nc *NamespaceCleaner) CollectToLog(ctx context.Context) {
// Log events if the test failed.
if g.CurrentSpecReport().Failed() {
By(fmt.Sprintf("Collecting events from namespace %q.", nc.NS.Name))
DumpEventsInNamespace(ctx, nc.Client, nc.NS.Name)
}
}

func (nc *NamespaceCleaner) Collect(ctx context.Context, artifactsDir string, _ string) {
By(fmt.Sprintf("Collecting dumps from namespace %q.", nc.NS.Name))

err := DumpNamespace(ctx, cacheddiscovery.NewMemCacheClient(nc.Client.Discovery()), nc.DynamicClient, nc.Client.CoreV1(), artifactsDir, nc.NS.Name)
o.Expect(err).NotTo(o.HaveOccurred())
}

func (nc *NamespaceCleaner) Cleanup(ctx context.Context) {
By("Destroying namespace %q.", nc.NS.Name)
err := nc.Client.CoreV1().Namespaces().Delete(
ctx,
nc.NS.Name,
metav1.DeleteOptions{
GracePeriodSeconds: pointer.Ptr[int64](0),
PropagationPolicy: pointer.Ptr(metav1.DeletePropagationForeground),
Preconditions: &metav1.Preconditions{
UID: &nc.NS.UID,
},
},
)
o.Expect(err).NotTo(o.HaveOccurred())

// We have deleted only the namespace object, but it can still be there with deletionTimestamp set.
By("Waiting for namespace %q to be removed.", nc.NS.Name)
err = WaitForObjectDeletion(ctx, nc.DynamicClient, corev1.SchemeGroupVersion.WithResource("namespaces"), "", nc.NS.Name, &nc.NS.UID)
o.Expect(err).NotTo(o.HaveOccurred())
klog.InfoS("Namespace removed.", "Namespace", nc.NS.Name)
}

type RestoreStrategy string

const (
RestoreStrategyRecreate RestoreStrategy = "Recreate"
RestoreStrategyUpdate RestoreStrategy = "Update"
)

type RestoringCleaner struct {
client kubernetes.Interface
dynamicClient dynamic.Interface
resourceInfo collect.ResourceInfo
object *unstructured.Unstructured
strategy RestoreStrategy
}

var _ CleanupInterface = &RestoringCleaner{}

func NewRestoringCleaner(ctx context.Context, client kubernetes.Interface, dynamicClient dynamic.Interface, resourceInfo collect.ResourceInfo, namespace string, name string, strategy RestoreStrategy) *RestoringCleaner {
g.By(fmt.Sprintf("Snapshotting object %s %q", resourceInfo.Resource, naming.ManualRef(namespace, name)))

if resourceInfo.Scope.Name() == meta.RESTScopeNameNamespace {
o.Expect(namespace).NotTo(o.BeEmpty())
}

obj, err := dynamicClient.Resource(resourceInfo.Resource).Namespace(namespace).Get(ctx, name, metav1.GetOptions{})
if apierrors.IsNotFound(err) {
zimnx marked this conversation as resolved.
Show resolved Hide resolved
klog.InfoS("No existing object found", "GVR", resourceInfo.Resource, "Instance", naming.ManualRef(namespace, name))
obj = &unstructured.Unstructured{
Object: map[string]interface{}{},
}
obj.SetNamespace(namespace)
obj.SetName(name)
obj.SetUID("")
} else {
o.Expect(err).NotTo(o.HaveOccurred())
klog.InfoS("Snapshotted object", "GVR", resourceInfo.Resource, "Instance", naming.ManualRef(namespace, name), "UID", obj.GetUID())
}

return &RestoringCleaner{
client: client,
dynamicClient: dynamicClient,
resourceInfo: resourceInfo,
object: obj,
strategy: strategy,
}
}

func (rc *RestoringCleaner) getCleansedObject() *unstructured.Unstructured {
obj := rc.object.DeepCopy()
obj.SetResourceVersion("")
obj.SetUID("")
obj.SetCreationTimestamp(metav1.Time{})
obj.SetDeletionTimestamp(nil)
return obj
}

func (rc *RestoringCleaner) CollectToLog(ctx context.Context) {}

func (rc *RestoringCleaner) Collect(ctx context.Context, clusterArtifactsDir string, ginkgoNamespace string) {
artifactsDir := clusterArtifactsDir
if len(artifactsDir) != 0 && rc.resourceInfo.Scope.Name() == meta.RESTScopeNameRoot {
// We have to prevent global object dumps being overwritten with each "It" block.
artifactsDir = filepath.Join(artifactsDir, "cluster-scoped-per-ns", ginkgoNamespace)
}

By(fmt.Sprintf("Collecting global %s %q for namespace %q.", rc.resourceInfo.Resource, naming.ObjRef(rc.object), ginkgoNamespace))

err := DumpResource(
ctx,
rc.client.Discovery(),
rc.dynamicClient,
rc.client.CoreV1(),
artifactsDir,
&rc.resourceInfo,
rc.object.GetNamespace(),
rc.object.GetName(),
)
if apierrors.IsNotFound(err) {
klog.V(2).InfoS("Skipping object collection because it no longer exists", "Ref", naming.ObjRef(rc.object), "Resource", rc.resourceInfo.Resource)
} else {
o.Expect(err).NotTo(o.HaveOccurred())
}
}

func (rc *RestoringCleaner) DeleteObject(ctx context.Context, ignoreNotFound bool) {
By("Deleting object %s %q.", rc.resourceInfo.Resource, naming.ObjRef(rc.object))
err := rc.dynamicClient.Resource(rc.resourceInfo.Resource).Namespace(rc.object.GetNamespace()).Delete(
ctx,
rc.object.GetName(),
metav1.DeleteOptions{
GracePeriodSeconds: pointer.Ptr[int64](0),
PropagationPolicy: pointer.Ptr(metav1.DeletePropagationForeground),
},
)
if apierrors.IsNotFound(err) && ignoreNotFound {
return
}
o.Expect(err).NotTo(o.HaveOccurred())

// We have deleted only the object, but it can still be there with deletionTimestamp set.
By("Waiting for object %s %q to be removed.", rc.resourceInfo.Resource, naming.ObjRef(rc.object))
err = WaitForObjectDeletion(ctx, rc.dynamicClient, rc.resourceInfo.Resource, rc.object.GetNamespace(), rc.object.GetName(), nil)
o.Expect(err).NotTo(o.HaveOccurred())
By("Object %s %q has been removed.", rc.resourceInfo.Resource, naming.ObjRef(rc.object))
}

func (rc *RestoringCleaner) recreateObject(ctx context.Context) {
o.Expect(rc.object.GetUID()).NotTo(o.BeEmpty())

rc.DeleteObject(ctx, true)

_, err := rc.dynamicClient.Resource(rc.resourceInfo.Resource).Namespace(rc.object.GetNamespace()).Create(ctx, rc.getCleansedObject(), metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
}

func (rc *RestoringCleaner) replaceObject(ctx context.Context) {
o.Expect(rc.object.GetUID()).NotTo(o.BeEmpty())
var err error
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
var freshObj *unstructured.Unstructured
freshObj, err = rc.dynamicClient.Resource(rc.resourceInfo.Resource).Namespace(rc.object.GetNamespace()).Get(ctx, rc.object.GetName(), metav1.GetOptions{})
if apierrors.IsNotFound(err) {
_, err = rc.dynamicClient.Resource(rc.resourceInfo.Resource).Namespace(rc.object.GetNamespace()).Create(ctx, rc.getCleansedObject(), metav1.CreateOptions{})
return err
}

obj := rc.getCleansedObject()
obj.SetResourceVersion(freshObj.GetResourceVersion())

o.Expect(err).NotTo(o.HaveOccurred())
_, err = rc.dynamicClient.Resource(rc.resourceInfo.Resource).Namespace(obj.GetNamespace()).Update(ctx, obj, metav1.UpdateOptions{})
return err
})
o.Expect(err).NotTo(o.HaveOccurred())
}

func (rc *RestoringCleaner) restoreObject(ctx context.Context) {
By("Restoring original object %s %q.", rc.resourceInfo.Resource, naming.ObjRef(rc.object))
switch rc.strategy {
case RestoreStrategyRecreate:
rc.recreateObject(ctx)
case RestoreStrategyUpdate:
rc.replaceObject(ctx)
default:
g.Fail(fmt.Sprintf("unexpected strategy %q", rc.strategy))
}
}

func (rc *RestoringCleaner) Cleanup(ctx context.Context) {
if len(rc.object.GetUID()) == 0 {
rc.DeleteObject(ctx, true)
return
}

rc.restoreObject(ctx)
}
Loading