Skip to content

Commit

Permalink
operator: add configurable pre-stop and post-start hooks that drain n…
Browse files Browse the repository at this point in the history
…odes
  • Loading branch information
nicolaferraro committed Apr 20, 2022
1 parent 46c56c9 commit e9c17bb
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 0 deletions.
19 changes: 19 additions & 0 deletions src/go/k8s/apis/redpanda/v1alpha1/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,15 @@ type ClusterSpec struct {
// DNS name.
// http://www.dns-sd.org/trailingdotsindomainnames.html
DNSTrailingDotDisabled bool `json:"dnsTrailingDotDisabled,omitempty"`
// RestartConfig allows to control the behavior of the cluster when restarting
RestartConfig *RestartConfig `json:"restartConfig,omitempty"`
}

// RestartConfig contains strategies to configure how the cluster behaves when restarting, because of upgrades
// or other lifecycle events.
type RestartConfig struct {
// DisableMaintenanceModeHooks deactivates the preStop and postStart hooks that force nodes to enter maintenance mode when stopping and exit maintenance mode when up again
DisableMaintenanceModeHooks *bool `json:"disableMaintenanceModeHooks,omitempty"`
}

// PDBConfig specifies how the PodDisruptionBudget should be created for the
Expand Down Expand Up @@ -793,6 +802,16 @@ func (r *Cluster) IsSchemaRegistryMutualTLSEnabled() bool {
r.Spec.Configuration.SchemaRegistry.TLS.RequireClientAuth
}

// IsUsingMaintenanceModeHooks tells if the cluster is configured to use maintenance mode hooks on the pods.
// Maintenance mode feature needs to be enabled for this to be relevant.
func (r *Cluster) IsUsingMaintenanceModeHooks() bool {
// enabled unless explicitly stated
if r.Spec.RestartConfig != nil && r.Spec.RestartConfig.DisableMaintenanceModeHooks != nil {
return !*r.Spec.RestartConfig.DisableMaintenanceModeHooks
}
return true
}

// ClusterStatus

// IsRestarting tells if the cluster is restarting due to a change in configuration or an upgrade in progress
Expand Down
25 changes: 25 additions & 0 deletions src/go/k8s/apis/redpanda/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions src/go/k8s/config/crd/bases/redpanda.vectorized.io_clusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,16 @@ spec:
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/'
type: object
type: object
restartConfig:
description: RestartConfig allows to control the behavior of the cluster
when restarting
properties:
disableMaintenanceModeHooks:
description: DisableMaintenanceModeHooks deactivates the preStop
and postStart hooks that force nodes to enter maintenance mode
when stopping and exit maintenance mode when up again
type: boolean
type: object
sidecars:
description: Sidecars is list of sidecars run alongside redpanda container
properties:
Expand Down
77 changes: 77 additions & 0 deletions src/go/k8s/pkg/resources/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,13 @@ func (r *StatefulSetResource) obj(
},
}

if featuregates.MaintenanceMode(r.pandaCluster.Spec.Version) && r.pandaCluster.IsUsingMaintenanceModeHooks() {
ss.Spec.Template.Spec.Containers[0].Lifecycle = &corev1.Lifecycle{
PreStop: r.getPreStopHook(),
PostStart: r.getPostStartHook(),
}
}

if featuregates.CentralizedConfiguration(r.pandaCluster.Spec.Version) {
ss.Spec.Template.Spec.Containers[0].VolumeMounts = append(ss.Spec.Template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{
Name: "configmap-dir",
Expand All @@ -535,6 +542,76 @@ func (r *StatefulSetResource) obj(
return ss, nil
}

// getPrestopHook creates a hook that drains the node before shutting down.
func (r *StatefulSetResource) getPreStopHook() *corev1.Handler {
// TODO replace scripts with proper RPK calls
curlCommand := r.composeCURLMaintenanceCommand(`-X PUT --silent -o /dev/null -w "%{http_code}"`, nil)
genericMaintenancePath := "/v1/maintenance"
curlGetCommand := r.composeCURLMaintenanceCommand(`--silent`, &genericMaintenancePath)
cmd := strings.Join(
[]string{
fmt.Sprintf(`until [ "${status:-}" = "200" ]; do status=$(%s); sleep 0.5; done`, curlCommand),
fmt.Sprintf(`until [ "${finished:-}" = "true" ]; do finished=$(%s | grep -o '\"finished\":[^,}]*' | grep -o '[^: ]*$'); sleep 0.5; done`, curlGetCommand),
}, " && ")

return &corev1.Handler{
Exec: &corev1.ExecAction{
Command: []string{
"/bin/bash",
"-c",
cmd,
},
},
}
}

// getPostStartHook creates a hook that removes maintenance mode after startup.
func (r *StatefulSetResource) getPostStartHook() *corev1.Handler {
// TODO replace scripts with proper RPK calls
curlCommand := r.composeCURLMaintenanceCommand(`-X DELETE --silent -o /dev/null -w "%{http_code}"`, nil)
// HTTP code 400 is returned by v22 nodes during an upgrade from v21 until the new version reaches quorum and the maintenance mode feature is enabled
cmd := fmt.Sprintf(`until [ "${status:-}" = "200" ] || [ "${status:-}" = "400" ]; do status=$(%s); sleep 0.5; done`, curlCommand)

return &corev1.Handler{
Exec: &corev1.ExecAction{
Command: []string{
"/bin/bash",
"-c",
cmd,
},
},
}
}

// nolint:goconst // no need
func (r *StatefulSetResource) composeCURLMaintenanceCommand(
options string, urlOverwrite *string,
) string {
adminAPI := r.pandaCluster.AdminAPIInternal()

cmd := fmt.Sprintf(`curl %s `, options)

tlsConfig := adminAPI.GetTLS()
proto := "http"
if tlsConfig != nil && tlsConfig.Enabled {
proto = "https"
if tlsConfig.RequireClientAuth {
cmd += "--cacert /etc/tls/certs/admin/ca/ca.crt --cert /etc/tls/certs/admin/tls.crt --key /etc/tls/certs/admin/tls.key "
} else {
cmd += "--cacert /etc/tls/certs/admin/tls.crt "
}
}
cmd += fmt.Sprintf("%s://${POD_NAME}.%s.%s.svc.cluster.local:%d", proto, r.pandaCluster.Name, r.pandaCluster.Namespace, adminAPI.Port)

if urlOverwrite == nil {
prefixLen := len(r.pandaCluster.Name) + 1
cmd += fmt.Sprintf("/v1/brokers/${POD_NAME:%d}/maintenance", prefixLen)
} else {
cmd += *urlOverwrite
}
return cmd
}

// setCloudStorage manipulates v1.StatefulSet object in order to add cloud storage specific
// properties to Redpanda pod.
func setCloudStorage(
Expand Down

0 comments on commit e9c17bb

Please sign in to comment.