Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not restart cluster when scaling up #4964

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 80 additions & 51 deletions src/go/k8s/controllers/redpanda/cluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,21 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)

var _ = Describe("RedPandaCluster controller", func() {
const (
timeout = time.Second * 30
interval = time.Second * 1

adminPort = 9644
kafkaPort = 9092
pandaProxyPort = 8082
schemaRegistryPort = 8081
redpandaConfigurationFile = "redpanda.yaml"
replicas = 1
redpandaContainerTag = "x"
redpandaContainerImage = "vectorized/redpanda"
)
const (
timeout = time.Second * 30
interval = time.Second * 1

adminPort = 9644
kafkaPort = 9092
pandaProxyPort = 8082
schemaRegistryPort = 8081
redpandaConfigurationFile = "redpanda.yaml"
replicas = 1
redpandaContainerTag = "x"
redpandaContainerImage = "vectorized/redpanda"
)

var _ = Describe("RedPandaCluster controller", func() {
Context("When creating RedpandaCluster", func() {
It("Should create Redpanda cluster with corresponding resources", func() {
resourceRedpanda := corev1.ResourceList{
Expand Down Expand Up @@ -259,48 +259,13 @@ var _ = Describe("RedPandaCluster controller", func() {
}, timeout, interval).Should(BeTrue())
})
It("creates redpanda cluster with tls enabled", func() {
resources := corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
}

key := types.NamespacedName{
Name: "redpanda-test-tls",
Namespace: "default",
}
redpandaCluster := &v1alpha1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: key.Name,
Namespace: key.Namespace,
},
Spec: v1alpha1.ClusterSpec{
Image: redpandaContainerImage,
Version: redpandaContainerTag,
Replicas: pointer.Int32Ptr(replicas),
Configuration: v1alpha1.RedpandaConfig{
KafkaAPI: []v1alpha1.KafkaAPI{
{
Port: kafkaPort,
TLS: v1alpha1.KafkaAPITLS{Enabled: true, RequireClientAuth: true},
},
},
AdminAPI: []v1alpha1.AdminAPI{{Port: adminPort}},
},
Resources: v1alpha1.RedpandaResourceRequirements{
ResourceRequirements: corev1.ResourceRequirements{
Limits: resources,
Requests: resources,
},
Redpanda: nil,
},
},
}
redpandaCluster := rpCluster()
Expect(k8sClient.Create(context.Background(), redpandaCluster)).Should(Succeed())

By("Creating StatefulSet")
var sts appsv1.StatefulSet
Eventually(func() bool {
err := k8sClient.Get(context.Background(), key, &sts)
err := k8sClient.Get(context.Background(), types.NamespacedName{Name: redpandaCluster.Name, Namespace: redpandaCluster.Namespace}, &sts)
return err == nil &&
*sts.Spec.Replicas == replicas
}, timeout, interval).Should(BeTrue())
Expand Down Expand Up @@ -690,6 +655,31 @@ var _ = Describe("RedPandaCluster controller", func() {
rc.Status.Nodes.ExternalBootstrap != nil
}, timeout, interval).Should(BeTrue())
})
It("does not trigger restart when scaling up", func() {
redpandaCluster := rpCluster()
redpandaCluster.Name = "no-restart-cluster"
key := types.NamespacedName{Name: redpandaCluster.Name, Namespace: redpandaCluster.Namespace}
Expect(k8sClient.Create(context.Background(), redpandaCluster)).Should(Succeed())

By("Creating StatefulSet")
var sts appsv1.StatefulSet
Eventually(func() bool {
err := k8sClient.Get(context.Background(), key, &sts)
return err == nil &&
*sts.Spec.Replicas == replicas
}, timeout, interval).Should(BeTrue())

// configmap annotation should not change when scaling up replicas
// to avoid unnecessary cluster restart
configMapHash := sts.Annotations[res.ConfigMapHashAnnotationKey]
var existingCluster v1alpha1.Cluster
Expect(k8sClient.Get(context.Background(), key, &existingCluster)).Should(Succeed())
var repliasP1 int32 = replicas + 1
existingCluster.Spec.Replicas = &repliasP1
Expect(k8sClient.Update(context.Background(), &existingCluster)).Should(Succeed())
newConfigMapHash := sts.Annotations[res.ConfigMapHashAnnotationKey]
Expect(newConfigMapHash).Should(Equal(configMapHash))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better using Consistently(func ...) in this case, even if it takes more time

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also the sts is not reloaded

})
})

Context("Calling reconcile", func() {
Expand Down Expand Up @@ -737,6 +727,45 @@ var _ = Describe("RedPandaCluster controller", func() {
Entry("Random image pull policy", "asdvasd", Not(Succeed())))
})

func rpCluster() *v1alpha1.Cluster {
resources := corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
}

key := types.NamespacedName{
Name: "redpanda-test-tls",
Namespace: "default",
}
return &v1alpha1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: key.Name,
Namespace: key.Namespace,
},
Spec: v1alpha1.ClusterSpec{
Image: redpandaContainerImage,
Version: redpandaContainerTag,
Replicas: pointer.Int32Ptr(replicas),
Configuration: v1alpha1.RedpandaConfig{
KafkaAPI: []v1alpha1.KafkaAPI{
{
Port: kafkaPort,
TLS: v1alpha1.KafkaAPITLS{Enabled: true, RequireClientAuth: true},
},
},
AdminAPI: []v1alpha1.AdminAPI{{Port: adminPort}},
},
Resources: v1alpha1.RedpandaResourceRequirements{
ResourceRequirements: corev1.ResourceRequirements{
Limits: resources,
Requests: resources,
},
Redpanda: nil,
},
},
}
}

func findPort(ports []corev1.ServicePort, name string) int32 {
for _, port := range ports {
if port.Name == name {
Expand Down
19 changes: 5 additions & 14 deletions src/go/k8s/pkg/resources/configmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ package resources
import (
"bytes"
"context"
"crypto/md5" // nolint:gosec // this is not encrypting secure info
"crypto/rand"
"encoding/json"
"errors"
Expand Down Expand Up @@ -645,24 +644,16 @@ func generatePassword(length int) (string, error) {
func (r *ConfigMapResource) GetNodeConfigHash(
ctx context.Context,
) (string, error) {
var configString string
cfg, err := r.CreateConfiguration(ctx)
if err != nil {
return "", err
}
if featuregates.CentralizedConfiguration(r.pandaCluster.Spec.Version) {
cfg, err := r.CreateConfiguration(ctx)
if err != nil {
return "", err
}
return cfg.GetNodeConfigurationHash()
}

// Previous behavior for v21.x
obj, err := r.obj(ctx)
if err != nil {
return "", err
}
configMap := obj.(*corev1.ConfigMap)
configString = configMap.Data[configKey]
md5Hash := md5.Sum([]byte(configString)) // nolint:gosec // this is not encrypting secure info
return fmt.Sprintf("%x", md5Hash), nil
return cfg.GetAllConfigurationHash()
}

// globalConfigurationChanged verifies if the new global configuration
Expand Down
6 changes: 3 additions & 3 deletions src/go/k8s/pkg/resources/configmap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func TestEnsureConfigMap_AdditionalConfig(t *testing.T) {
name: "Primitive object in additional configuration",
additionalConfiguration: map[string]string{"redpanda.transactional_id_expiration_ms": "25920000000"},
expectedStrings: []string{"transactional_id_expiration_ms: 25920000000"},
expectedHash: "0cb36f0be0d64032a61eb51a5d2985ea",
expectedHash: "66339723e4a05fd6ddf0c69a1c21ef50",
},
{
name: "Complex struct in additional configuration",
Expand All @@ -114,15 +114,15 @@ func TestEnsureConfigMap_AdditionalConfig(t *testing.T) {
- address: 0.0.0.0
port: 8081
name: external`},
expectedHash: "4697714fe9b8f8bcaebb814b93f2b8f6",
expectedHash: "13f15bd68fe224846f532c29a10eb3b0",
},
{
name: "shadow index cache directory",
expectedStrings: []string{
`cloud_storage_cache_directory: /var/lib/shadow-index-cache`,
`cloud_storage_cache_size: "10737418240"`,
},
expectedHash: "2f51e71fa4b673fb105f98cb09cb7a00",
expectedHash: "27a43c846e6c990e60307fb3b88f91c4",
},
}
for _, tc := range testcases {
Expand Down
20 changes: 20 additions & 0 deletions src/go/k8s/pkg/resources/configuration/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ func (c *GlobalConfiguration) GetNodeConfigurationHash() (string, error) {
clone := *c
// clean any cluster property from config before serializing
clone.ClusterConfiguration = nil
removeIgnoredFields(&clone)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think probably we don't need to clone the object, but the code above seems to suggest we do a deep copy, while in practice it's doing a shallow one and the object is probably being modified

props := clone.NodeConfiguration.Redpanda.Other
clone.NodeConfiguration.Redpanda.Other = make(map[string]interface{})
for k, v := range props {
Expand All @@ -135,6 +136,25 @@ func (c *GlobalConfiguration) GetNodeConfigurationHash() (string, error) {
return fmt.Sprintf("%x", md5Hash), nil
}

// GetAllConfigurationHash computes a hash of the whole serialized config. This
// is default behavior prior to centralized configuration feature was developed
func (c *GlobalConfiguration) GetAllConfigurationHash() (string, error) {
clone := *c
removeIgnoredFields(&clone)
serialized, err := clone.Serialize()
if err != nil {
return "", err
}
md5Hash := md5.Sum(serialized.RedpandaFile) // nolint:gosec // this is not encrypting secure info
return fmt.Sprintf("%x", md5Hash), nil
}

func removeIgnoredFields(clone *GlobalConfiguration) {
// ignore seeds for hash computation so that changes in this field don't
// trigger cluster restats
clone.NodeConfiguration.Redpanda.SeedServers = []config.SeedServer{}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there are other cases where we iterate over the replicas to create urls, such as for panda proxy and schema registry

}

// GetAdditionalRedpandaProperty retrieves a configuration option
func (c *GlobalConfiguration) GetAdditionalRedpandaProperty(
prop string,
Expand Down
21 changes: 21 additions & 0 deletions src/go/k8s/pkg/resources/configuration/configuration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"testing"

"github.com/redpanda-data/redpanda/src/go/k8s/pkg/resources/configuration"
rpkcfg "github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -123,3 +124,23 @@ func TestStringSliceProperties(t *testing.T) {
})
}
}

func TestHash_SeedServersNoHashChange(t *testing.T) {
cfg := configuration.For("v22.1.1-test")
cfg.NodeConfiguration.Redpanda.SeedServers = []rpkcfg.SeedServer{}
nodeConfHash, err := cfg.GetNodeConfigurationHash()
require.NoError(t, err)
allConfHash, err := cfg.GetAllConfigurationHash()
require.NoError(t, err)

cfg.NodeConfiguration.Redpanda.SeedServers = []rpkcfg.SeedServer{{Host: rpkcfg.SocketAddress{Address: "redpanda.com", Port: 9090}}}
nodeConfHashNew, err := cfg.GetNodeConfigurationHash()
require.NoError(t, err)
allConfHashNew, err := cfg.GetAllConfigurationHash()
require.NoError(t, err)

// seed servers should not change hash to not require restart (e.g. when
// scaling up/down cluster)
require.Equal(t, allConfHash, allConfHashNew, "all conf")
require.Equal(t, nodeConfHash, nodeConfHashNew, "node conf")
}