Skip to content

Commit

Permalink
Merge pull request #2763 from randomvariable/boootstrappy
Browse files Browse the repository at this point in the history
✨ Add `UseExperimentalRetryJoin` to KubeadmConfig
  • Loading branch information
k8s-ci-robot committed Mar 25, 2020
2 parents f003f6e + dcd3d51 commit 4672438
Show file tree
Hide file tree
Showing 11 changed files with 491 additions and 9 deletions.
10 changes: 9 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ CONVERSION_GEN := $(abspath $(TOOLS_BIN_DIR)/conversion-gen)
# Bindata.
GOBINDATA := $(abspath $(TOOLS_BIN_DIR)/go-bindata)
GOBINDATA_CLUSTERCTL_DIR := cmd/clusterctl/config
CLOUDINIT_PKG_DIR := bootstrap/kubeadm/internal/cloudinit
CLOUDINIT_GENERATED := $(CLOUDINIT_PKG_DIR)/zz_generated.bindata.go
CLOUDINIT_SCRIPT := $(CLOUDINIT_PKG_DIR)/kubeadm-bootstrap-script.sh
CERTMANAGER_COMPONENTS_GENERATED_FILE := cert-manager.yaml

# Define Docker related variables. Releases should modify and double check these vars.
Expand Down Expand Up @@ -242,7 +245,7 @@ generate-go-kubeadm-control-plane: $(CONTROLLER_GEN) $(CONVERSION_GEN) ## Runs G
paths=./controlplane/kubeadm/api/...

.PHONY: generate-bindata
generate-bindata: $(KUSTOMIZE) $(GOBINDATA) clean-bindata ## Generate code for embedding the clusterctl api manifest
generate-bindata: $(KUSTOMIZE) $(GOBINDATA) clean-bindata $(CLOUDINIT_GENERATED) ## Generate code for embedding the clusterctl api manifest
# Package manifest YAML into a single file.
mkdir -p $(GOBINDATA_CLUSTERCTL_DIR)/manifest/
$(KUSTOMIZE) build $(GOBINDATA_CLUSTERCTL_DIR)/crd > $(GOBINDATA_CLUSTERCTL_DIR)/manifest/clusterctl-api.yaml
Expand All @@ -255,6 +258,11 @@ generate-bindata: $(KUSTOMIZE) $(GOBINDATA) clean-bindata ## Generate code for e
# Cleanup the manifest folder.
$(MAKE) clean-bindata

$(CLOUDINIT_GENERATED): $(GOBINDATA) $(CLOUDINIT_SCRIPT)
$(GOBINDATA) -mode=420 -modtime=1 -pkg=cloudinit -o=$(CLOUDINIT_GENERATED).tmp $(CLOUDINIT_SCRIPT)
cat ./hack/boilerplate/boilerplate.generatego.txt $(CLOUDINIT_GENERATED).tmp > $(CLOUDINIT_GENERATED)
rm $(CLOUDINIT_GENERATED).tmp

.PHONY: generate-manifests
generate-manifests: ## Generate manifests e.g. CRD, RBAC etc.
$(MAKE) generate-core-manifests
Expand Down
2 changes: 1 addition & 1 deletion bootstrap/kubeadm/api/v1alpha2/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func (src *KubeadmConfig) ConvertTo(dstRaw conversion.Hub) error {

dst.Status.DataSecretName = restored.Status.DataSecretName
dst.Spec.Verbosity = restored.Spec.Verbosity
dst.Spec.UseExperimentalRetryJoin = restored.Spec.UseExperimentalRetryJoin

return nil
}
Expand Down Expand Up @@ -119,7 +120,6 @@ func Convert_v1alpha3_KubeadmConfigStatus_To_v1alpha2_KubeadmConfigStatus(in *ku
return nil
}


// Convert_v1alpha2_KubeadmConfigSpec_To_v1alpha3_KubeadmConfigSpec converts this KubeadmConfigSpec to the Hub version (v1alpha3).
func Convert_v1alpha2_KubeadmConfigSpec_To_v1alpha3_KubeadmConfigSpec(in *KubeadmConfigSpec, out *kubeadmbootstrapv1alpha3.KubeadmConfigSpec, s apiconversion.Scope) error {
return autoConvert_v1alpha2_KubeadmConfigSpec_To_v1alpha3_KubeadmConfigSpec(in, out, s)
Expand Down
1 change: 1 addition & 0 deletions bootstrap/kubeadm/api/v1alpha2/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions bootstrap/kubeadm/api/v1alpha3/kubeadmbootstrapconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,19 @@ type KubeadmConfigSpec struct {
// It overrides the `--v` flag in kubeadm commands.
// +optional
Verbosity *int32 `json:"verbosity,omitempty"`

// UseExperimentalRetryJoin replaces a basic kubeadm command with a shell
// script with retries for joins.
//
// This is meant to be an experimental temporary workaround on some environments
// where joins fail due to timing (and other issues). The long term goal is to add retries to
// kubeadm proper and use that functionality.
//
// This will add about 40KB to userdata
//
// For more information, refer to https://github.com/kubernetes-sigs/cluster-api/pull/2763#discussion_r397306055.
// +optional
UseExperimentalRetryJoin bool `json:"useExperimentalRetryJoin,omitempty"`
}

// KubeadmConfigStatus defines the observed state of KubeadmConfig
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1554,6 +1554,15 @@ spec:
items:
type: string
type: array
useExperimentalRetryJoin:
description: "UseExperimentalRetryJoin replaces a basic kubeadm command
with a shell script with retries for joins. \n This is meant to
be an experimental temporary workaround on some environments where
joins fail due to timing (and other issues). The long term goal
is to add retries to kubeadm proper and use that functionality.
\n This will add about 40KB to userdata \n For more information,
refer to https://github.com/kubernetes-sigs/cluster-api/pull/2763#discussion_r397306055."
type: boolean
users:
description: Users specifies extra users to add
items:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1633,6 +1633,15 @@ spec:
items:
type: string
type: array
useExperimentalRetryJoin:
description: "UseExperimentalRetryJoin replaces a basic kubeadm
command with a shell script with retries for joins. \n This
is meant to be an experimental temporary workaround on some
environments where joins fail due to timing (and other issues).
The long term goal is to add retries to kubeadm proper and
use that functionality. \n This will add about 40KB to userdata
\n For more information, refer to https://github.com/kubernetes-sigs/cluster-api/pull/2763#discussion_r397306055."
type: boolean
users:
description: Users specifies extra users to add
items:
Expand Down
5 changes: 3 additions & 2 deletions bootstrap/kubeadm/controllers/kubeadmconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -489,8 +489,9 @@ func (r *KubeadmConfigReconciler) joinControlplane(ctx context.Context, scope *S
}

cloudJoinData, err := cloudinit.NewJoinControlPlane(&cloudinit.ControlPlaneJoinInput{
JoinConfiguration: joinData,
Certificates: certificates,
JoinConfiguration: joinData,
Certificates: certificates,
UseExperimentalRetry: scope.Config.Spec.UseExperimentalRetryJoin,
BaseUserData: cloudinit.BaseUserData{
AdditionalFiles: scope.Config.Spec.Files,
NTP: scope.Config.Spec.NTP,
Expand Down
45 changes: 40 additions & 5 deletions bootstrap/kubeadm/internal/cloudinit/controlplane_join.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,19 @@ limitations under the License.
package cloudinit

import (
"github.com/pkg/errors"
"fmt"

"github.com/pkg/errors"
bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1alpha3"
"sigs.k8s.io/cluster-api/util/secret"
)

const (
standardJoinCommand = "kubeadm join --config /tmp/kubeadm-controlplane-join-config.yaml %s"
retriableJoinScriptName = "/usr/local/bin/kubeadm-bootstrap-script"
retriableJoinScriptOwner = "root"
retriableJoinScriptPermissions = "0755"

controlPlaneJoinCloudInit = `{{.Header}}
{{template "files" .WriteFiles}}
- path: /tmp/kubeadm-controlplane-join-config.yaml
Expand All @@ -32,7 +39,7 @@ const (
{{.JoinConfiguration | Indent 6}}
runcmd:
{{- template "commands" .PreKubeadmCommands }}
- 'kubeadm join --config /tmp/kubeadm-controlplane-join-config.yaml {{.KubeadmVerbosity}}'
- {{ .KubeadmCommand }}
{{- template "commands" .PostKubeadmCommands }}
{{- template "ntp" .NTP }}
{{- template "users" .Users }}
Expand All @@ -43,9 +50,10 @@ runcmd:
type ControlPlaneJoinInput struct {
BaseUserData
secret.Certificates

BootstrapToken string
JoinConfiguration string
UseExperimentalRetry bool
KubeadmCommand string
BootstrapToken string
JoinConfiguration string
}

// NewJoinControlPlane returns the user data string to be used on a new control plane instance.
Expand All @@ -54,10 +62,37 @@ func NewJoinControlPlane(input *ControlPlaneJoinInput) ([]byte, error) {
// TODO: Consider validating that the correct certificates exist. It is different for external/stacked etcd
input.WriteFiles = input.Certificates.AsFiles()
input.WriteFiles = append(input.WriteFiles, input.AdditionalFiles...)
input.KubeadmCommand = fmt.Sprintf(standardJoinCommand, input.KubeadmVerbosity)
if input.UseExperimentalRetry {
err := input.useBootstrapScript()
if err != nil {
return nil, err
}
}
userData, err := generate("JoinControlplane", controlPlaneJoinCloudInit, input)
if err != nil {
return nil, errors.Wrapf(err, "failed to generate user data for machine joining control plane")
}

return userData, err
}

func (input *ControlPlaneJoinInput) useBootstrapScript() error {
scriptBytes, err := bootstrapKubeadmInternalCloudinitKubeadmBootstrapScriptShBytes()
if err != nil {
return errors.Wrap(err, "couldn't read bootstrap script")
}
joinScript, err := generate("JoinControlplaneScript", string(scriptBytes), input)
if err != nil {
return errors.Wrap(err, "failed to generate user data for machine joining control plane")
}
joinScriptFile := bootstrapv1.File{
Path: retriableJoinScriptName,
Owner: retriableJoinScriptOwner,
Permissions: retriableJoinScriptPermissions,
Content: string(joinScript),
}
input.WriteFiles = append(input.WriteFiles, joinScriptFile)
input.KubeadmCommand = retriableJoinScriptName
return nil
}
129 changes: 129 additions & 0 deletions bootstrap/kubeadm/internal/cloudinit/kubeadm-bootstrap-script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#!/bin/bash
# Copyright 2020 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Log an error and exit.
# Args:
# $1 Message to log with the error
# $2 The error code to return
log::error_exit() {
local message="${1}"
local code="${2}"

log::error "${message}"
log::info "Removing member from cluster status"
kubeadm reset -f update-cluster-status || true
log::info "Removing etcd member"
kubeadm reset -f remove-etcd-member || true
log::info "Resetting kubeadm"
kubeadm reset -f || true
log::error "cluster.x-k8s.io kubeadm bootstrap script $0 exiting with status ${code}"
exit "${code}"
}

log::success_exit() {
log::info "cluster.x-k8s.io kubeadm bootstrap script $0 finished"
exit 0
}

# Log an error but keep going.
log::error() {
local message="${1}"
timestamp=$(date --iso-8601=seconds)
echo "!!! [${timestamp}] ${1}" >&2
shift
for message; do
echo " ${message}" >&2
done
}

# Print a status line. Formatted to show up in a stream of output.
log::info() {
timestamp=$(date --iso-8601=seconds)
echo "+++ [${timestamp}] ${1}"
shift
for message; do
echo " ${message}"
done
}

check_kubeadm_command() {
local command="${1}"
local code="${2}"
case ${code} in
"0")
log::info "kubeadm reported successful execution for ${command}"
;;
"1")
log::error "kubeadm reported failed action(s) for ${command}"
;;
"2")
log::error "kubeadm reported preflight check error during ${command}"
;;
"3")
log::error_exit "kubeadm reported validation error for ${command}"
;;
*)
log::error "kubeadm reported unknown error ${code} for ${command}"
;;
esac
}

function retry-command() {
n=0
local kubeadm_return
until [ $n -ge 5 ]; do
log::info "running '$*'"
# shellcheck disable=SC1083
"$@" --config /tmp/kubeadm-controlplane-join-config.yaml {{.KubeadmVerbosity}}
kubeadm_return=$?
check_kubeadm_command "'$*'" "${kubeadm_return}"
if [ ${kubeadm_return} -eq 0 ]; then
break
fi
# We allow preflight errors to pass
if [ ${kubeadm_return} -eq 2 ]; then
break
fi
n=$((n + 1))
sleep 15
done
if [ ${kubeadm_return} -ne 0 ]; then
log::error_exit "too many errors, exiting"
fi
}

function try-or-die-command() {
local kubeadm_return
log::info "running '$*'"
# shellcheck disable=SC1083
"$@" --config /tmp/kubeadm-controlplane-join-config.yaml {{.KubeadmVerbosity}}
kubeadm_return=$?
check_kubeadm_command "'$*'" "${kubeadm_return}"
if [ ${kubeadm_return} -ne 0 ]; then
log::error_exit "fatal error, exiting"
fi
}

retry-command kubeadm join phase preflight
retry-command kubeadm join phase control-plane-prepare download-certs
retry-command kubeadm join phase control-plane-prepare certs
retry-command kubeadm join phase control-plane-prepare kubeconfig
retry-command kubeadm join phase control-plane-prepare control-plane
retry-command kubeadm join phase kubelet-start
try-or-die-command kubeadm join phase control-plane-join etcd
retry-command kubeadm join phase control-plane-join update-status
retry-command kubeadm join phase control-plane-join mark-control-plane

log::success_exit
Loading

0 comments on commit 4672438

Please sign in to comment.