diff --git a/changelog/10964.txt b/changelog/10964.txt new file mode 100644 index 000000000000..04874b815206 --- /dev/null +++ b/changelog/10964.txt @@ -0,0 +1,5 @@ +```release-note:changes +agent: Failed auto-auth attempts are now throttled by an exponential backoff instead of the +~2 second retry delay. The maximum backoff may be configured with the new `max_backoff` parameter, +which defaults to 5 minutes. +``` diff --git a/command/agent.go b/command/agent.go index 66ad04430ed6..cf413004ab7c 100644 --- a/command/agent.go +++ b/command/agent.go @@ -575,6 +575,7 @@ func (c *AgentCommand) Run(args []string) int { Logger: c.logger.Named("auth.handler"), Client: c.client, WrapTTL: config.AutoAuth.Method.WrapTTL, + MaxBackoff: config.AutoAuth.Method.MaxBackoff, EnableReauthOnNewCredentials: config.AutoAuth.EnableReauthOnNewCredentials, EnableTemplateTokenCh: enableTokenCh, }) diff --git a/command/agent/auth/auth.go b/command/agent/auth/auth.go index e8aa9cfdfc37..4132b0dcd052 100644 --- a/command/agent/auth/auth.go +++ b/command/agent/auth/auth.go @@ -8,11 +8,16 @@ import ( "net/http" "time" - hclog "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-hclog" "github.com/hashicorp/vault/api" "github.com/hashicorp/vault/sdk/helper/jsonutil" ) +const ( + initialBackoff = 1 * time.Second + defaultMaxBackoff = 5 * time.Minute +) + // AuthMethod is the interface that auto-auth methods implement for the agent // to use. type AuthMethod interface { @@ -48,6 +53,7 @@ type AuthHandler struct { client *api.Client random *rand.Rand wrapTTL time.Duration + maxBackoff time.Duration enableReauthOnNewCredentials bool enableTemplateTokenCh bool } @@ -56,6 +62,7 @@ type AuthHandlerConfig struct { Logger hclog.Logger Client *api.Client WrapTTL time.Duration + MaxBackoff time.Duration Token string EnableReauthOnNewCredentials bool EnableTemplateTokenCh bool @@ -72,6 +79,7 @@ func NewAuthHandler(conf *AuthHandlerConfig) *AuthHandler { client: conf.Client, random: rand.New(rand.NewSource(int64(time.Now().Nanosecond()))), wrapTTL: conf.WrapTTL, + maxBackoff: conf.MaxBackoff, enableReauthOnNewCredentials: conf.EnableReauthOnNewCredentials, enableTemplateTokenCh: conf.EnableTemplateTokenCh, } @@ -91,6 +99,13 @@ func (ah *AuthHandler) Run(ctx context.Context, am AuthMethod) error { return errors.New("auth handler: nil auth method") } + backoff := initialBackoff + maxBackoff := defaultMaxBackoff + + if ah.maxBackoff > 0 { + maxBackoff = ah.maxBackoff + } + ah.logger.Info("starting auth handler") defer func() { am.Shutdown() @@ -130,8 +145,7 @@ func (ah *AuthHandler) Run(ctx context.Context, am AuthMethod) error { default: } - // Create a fresh backoff value - backoff := 2*time.Second + time.Duration(ah.random.Int63()%int64(time.Second*2)-int64(time.Second)) + backoff = calculateBackoff(backoff, maxBackoff) var clientToUse *api.Client var err error @@ -311,3 +325,16 @@ func (ah *AuthHandler) Run(ctx context.Context, am AuthMethod) error { } } } + +// calculateBackoff determines a new backoff duration that is roughly twice +// the previous value, capped to a max value, with a measure of randomness. +func calculateBackoff(previous, max time.Duration) time.Duration { + maxBackoff := 2 * previous + if maxBackoff > max { + maxBackoff = max + } + + // Trim a random amount (0-25%) off the doubled duration + trim := rand.Int63n(int64(maxBackoff) / 4) + return maxBackoff - time.Duration(trim) +} diff --git a/command/agent/auth/auth_test.go b/command/agent/auth/auth_test.go index 8b6ae7003037..ab1718ec9695 100644 --- a/command/agent/auth/auth_test.go +++ b/command/agent/auth/auth_test.go @@ -6,7 +6,7 @@ import ( "testing" "time" - hclog "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-hclog" "github.com/hashicorp/vault/api" "github.com/hashicorp/vault/builtin/credential/userpass" vaulthttp "github.com/hashicorp/vault/http" @@ -106,3 +106,42 @@ consumption: } } } + +func TestCalculateBackoff(t *testing.T) { + tests := []struct { + previous time.Duration + max time.Duration + expMin time.Duration + expMax time.Duration + }{ + { + 1000 * time.Millisecond, + 60000 * time.Millisecond, + 1500 * time.Millisecond, + 2000 * time.Millisecond, + }, + { + 1000 * time.Millisecond, + 5000 * time.Millisecond, + 1500 * time.Millisecond, + 2000 * time.Millisecond, + }, + { + 4000 * time.Millisecond, + 5000 * time.Millisecond, + 3750 * time.Millisecond, + 5000 * time.Millisecond, + }, + } + + for _, test := range tests { + for i := 0; i < 100; i++ { + backoff := calculateBackoff(test.previous, test.max) + + // Verify that the new backoff is 75-100% of 2*previous, but <= than the max + if backoff < test.expMin || backoff > test.expMax { + t.Fatalf("expected backoff in range %v to %v, got: %v", test.expMin, test.expMax, backoff) + } + } + } +} diff --git a/command/agent/config/config.go b/command/agent/config/config.go index a8d233b1e5a9..8e3f3f7deb7c 100644 --- a/command/agent/config/config.go +++ b/command/agent/config/config.go @@ -62,12 +62,14 @@ type AutoAuth struct { // Method represents the configuration for the authentication backend type Method struct { - Type string - MountPath string `hcl:"mount_path"` - WrapTTLRaw interface{} `hcl:"wrap_ttl"` - WrapTTL time.Duration `hcl:"-"` - Namespace string `hcl:"namespace"` - Config map[string]interface{} + Type string + MountPath string `hcl:"mount_path"` + WrapTTLRaw interface{} `hcl:"wrap_ttl"` + WrapTTL time.Duration `hcl:"-"` + MaxBackoffRaw interface{} `hcl:"max_backoff"` + MaxBackoff time.Duration `hcl:"-"` + Namespace string `hcl:"namespace"` + Config map[string]interface{} } // Sink defines a location to write the authenticated token @@ -358,6 +360,14 @@ func parseAutoAuth(result *Config, list *ast.ObjectList) error { } } + if result.AutoAuth.Method.MaxBackoffRaw != nil { + var err error + if result.AutoAuth.Method.MaxBackoff, err = parseutil.ParseDurationSecond(result.AutoAuth.Method.MaxBackoffRaw); err != nil { + return err + } + result.AutoAuth.Method.MaxBackoffRaw = nil + } + return nil } diff --git a/command/agent/config/config_test.go b/command/agent/config/config_test.go index 6bd3b2d85b79..960c374fb157 100644 --- a/command/agent/config/config_test.go +++ b/command/agent/config/config_test.go @@ -126,6 +126,7 @@ func TestLoadConfigFile(t *testing.T) { Config: map[string]interface{}{ "role": "foobar", }, + MaxBackoff: 0, }, Sinks: []*Sink{ { @@ -178,9 +179,10 @@ func TestLoadConfigFile_Method_Wrapping(t *testing.T) { }, AutoAuth: &AutoAuth{ Method: &Method{ - Type: "aws", - MountPath: "auth/aws", - WrapTTL: 5 * time.Minute, + Type: "aws", + MountPath: "auth/aws", + WrapTTL: 5 * time.Minute, + MaxBackoff: 2 * time.Minute, Config: map[string]interface{}{ "role": "foobar", }, diff --git a/command/agent/config/test-fixtures/config-method-wrapping.hcl b/command/agent/config/test-fixtures/config-method-wrapping.hcl index 2a5e34145475..cbafc5a24593 100644 --- a/command/agent/config/test-fixtures/config-method-wrapping.hcl +++ b/command/agent/config/test-fixtures/config-method-wrapping.hcl @@ -7,6 +7,7 @@ auto_auth { config = { role = "foobar" } + max_backoff = "2m" } sink { diff --git a/website/content/docs/agent/autoauth/index.mdx b/website/content/docs/agent/autoauth/index.mdx index fb8d9a894bc1..f2b48a1dbf3f 100644 --- a/website/content/docs/agent/autoauth/index.mdx +++ b/website/content/docs/agent/autoauth/index.mdx @@ -20,9 +20,8 @@ are locations where the agent should write a token any time the current token value has changed. When the agent is started with Auto-Auth enabled, it will attempt to acquire a -Vault token using the configured Method. On failure, it will back off for a -short while (including some randomness to help prevent thundering herd -scenarios) and retry. On success, unless the auth method is configured to wrap +Vault token using the configured Method. On failure, it will exponentially back +off and then retry. On success, unless the auth method is configured to wrap the tokens, it will keep the resulting token renewed until renewal is no longer allowed or fails, at which point it will attempt to reauthenticate. @@ -128,6 +127,10 @@ These are common configuration values that live within the `method` block: structure. Values can be an integer number of seconds or a stringish value like `5m`. +- `max_backoff` `(string or integer: "5m")` - The maximum time Agent will delay + before retrying after a failed auth attempt. The backoff will start at 1 second + and double (with some randomness) after successive failures, capped by `max_backoff.` + - `config` `(object: required)` - Configuration of the method itself. See the sidebar for information about each method.