diff --git a/changelog/25636.txt b/changelog/25636.txt new file mode 100644 index 000000000000..d5528fb5df33 --- /dev/null +++ b/changelog/25636.txt @@ -0,0 +1,3 @@ +```release-note:improvement +core: make the best effort timeout for encryption count tracking persistence configurable via an environment variable. +``` \ No newline at end of file diff --git a/vault/barrier_aes_gcm.go b/vault/barrier_aes_gcm.go index 6870536bc711..fd5a9493a5e9 100644 --- a/vault/barrier_aes_gcm.go +++ b/vault/barrier_aes_gcm.go @@ -14,11 +14,14 @@ import ( "fmt" "io" "math" + "os" "strconv" "strings" "sync" "time" + "github.com/hashicorp/go-secure-stdlib/parseutil" + "github.com/armon/go-metrics" "github.com/hashicorp/go-secure-stdlib/strutil" "github.com/hashicorp/vault/sdk/helper/jsonutil" @@ -38,7 +41,8 @@ const ( autoRotateCheckInterval = 5 * time.Minute legacyRotateReason = "legacy rotation" // The keyring is persisted before the root key. - keyringTimeout = 1 * time.Second + defaultKeyringTimeout = 1 * time.Second + bestEffortKeyringTimeoutOverride = "VAULT_ENCRYPTION_COUNT_PERSIST_TIMEOUT" ) // Versions of the AESGCM storage methodology @@ -91,6 +95,8 @@ type AESGCMBarrier struct { // Used only for testing RemoteEncryptions *atomic.Int64 totalLocalEncryptions *atomic.Int64 + + bestEffortKeyringTimeout time.Duration } func (b *AESGCMBarrier) RotationConfig() (kc KeyRotationConfig, err error) { @@ -115,6 +121,15 @@ func (b *AESGCMBarrier) SetRotationConfig(ctx context.Context, rotConfig KeyRota // NewAESGCMBarrier is used to construct a new barrier that uses // the provided physical backend for storage. func NewAESGCMBarrier(physical physical.Backend) (*AESGCMBarrier, error) { + keyringTimeout := defaultKeyringTimeout + keyringTimeoutStr := os.Getenv(bestEffortKeyringTimeoutOverride) + if keyringTimeoutStr != "" { + t, err := parseutil.ParseDurationSecond(keyringTimeoutStr) + if err != nil { + return nil, fmt.Errorf("failed parsing %s environment variable: %w", bestEffortKeyringTimeoutOverride, err) + } + keyringTimeout = t + } b := &AESGCMBarrier{ backend: physical, sealed: true, @@ -123,6 +138,7 @@ func NewAESGCMBarrier(physical physical.Backend) (*AESGCMBarrier, error) { UnaccountedEncryptions: atomic.NewInt64(0), RemoteEncryptions: atomic.NewInt64(0), totalLocalEncryptions: atomic.NewInt64(0), + bestEffortKeyringTimeout: keyringTimeout, } return b, nil } @@ -256,7 +272,7 @@ func (b *AESGCMBarrier) persistKeyringInternal(ctx context.Context, keyring *Key // We reduce the timeout on the initial 'put' but if this succeeds we will // allow longer later on when we try to persist the root key . var cancelKeyring func() - ctxKeyring, cancelKeyring = context.WithTimeout(ctx, keyringTimeout) + ctxKeyring, cancelKeyring = context.WithTimeout(ctx, b.bestEffortKeyringTimeout) defer cancelKeyring() } diff --git a/website/content/docs/internals/rotation.mdx b/website/content/docs/internals/rotation.mdx index 29c3970764cd..79e653b9fe07 100644 --- a/website/content/docs/internals/rotation.mdx +++ b/website/content/docs/internals/rotation.mdx @@ -73,3 +73,11 @@ Operators can estimate the number of encryptions by summing the following: - The `vault.token.creation` metric where the `token_type` label is `batch`. - The `merkle.flushDirty.num_pages` metric. - The WAL index. + +Vault periodically persists the number of encryptions to support rotation. +This save operation has a 1 second timeout to prevent impact to performance +if Vault is under heavy load. Because persisting encryptions involves the +seal backend (if seal wrap is enabled), some seals (such as HSMs) may take +regularly longer than 1 second to respond. If this is the case, operators +may override that timeout by setting the environment variable +`VAULT_ENCRYPTION_COUNT_PERSIST_TIMEOUT` to a larger value, such as "5s". \ No newline at end of file