-
Notifications
You must be signed in to change notification settings - Fork 512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add a lifetime manager for Vault authentication tokens #7337
Merged
Merged
Changes from 8 commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
f1c1933
Initial renewal implementation
fayzal-g cebefd6
Add metrics and integration test to test token renewal
fayzal-g 8fc330f
remove unneeeded comment
fayzal-g 199d016
Update CHANGELOG.md
fayzal-g 9ecc5db
Use promauto
fayzal-g 721c3c1
address comments
fayzal-g 65413c7
Use global const for Vault image
fayzal-g 0af72b5
Add failure metric
fayzal-g 0e0cf03
Update metrics
fayzal-g 9896451
Update CHANGELOG and mark as experimental
fayzal-g c874f22
include metric names in about-versioning
fayzal-g File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
// SPDX-License-Identifier: AGPL-3.0-only | ||
|
||
package integration | ||
|
||
var ( | ||
VaultImage = "hashicorp/vault:1.13.2" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
// SPDX-License-Identifier: AGPL-3.0-only | ||
//go:build requires_docker | ||
|
||
package integration | ||
|
||
import ( | ||
"fmt" | ||
"testing" | ||
|
||
"github.com/grafana/e2e" | ||
e2edb "github.com/grafana/e2e/db" | ||
hashivault "github.com/hashicorp/vault/api" | ||
"github.com/stretchr/testify/require" | ||
|
||
"github.com/grafana/mimir/integration/e2emimir" | ||
) | ||
|
||
func TestVaultTokenRenewal(t *testing.T) { | ||
const devToken = "dev_token" | ||
const httpPort = 8200 | ||
|
||
s, err := e2e.NewScenario(networkName) | ||
require.NoError(t, err) | ||
defer s.Close() | ||
|
||
// Initialize Vault | ||
vault := e2e.NewHTTPService( | ||
"vault", | ||
VaultImage, | ||
nil, | ||
e2e.NewHTTPReadinessProbe(httpPort, "/v1/sys/health", 200, 200), | ||
httpPort, | ||
) | ||
vault.SetEnvVars(map[string]string{"VAULT_DEV_ROOT_TOKEN_ID": devToken}) | ||
require.NoError(t, s.StartAndWaitReady(vault)) | ||
|
||
cli, err := hashivault.NewClient(&hashivault.Config{Address: fmt.Sprintf("http://%s", vault.HTTPEndpoint())}) | ||
require.NoError(t, err) | ||
|
||
cli.SetToken(devToken) | ||
|
||
err = cli.Sys().EnableAuthWithOptions("userpass", &hashivault.EnableAuthOptions{ | ||
Type: "userpass", | ||
}) | ||
require.NoError(t, err) | ||
|
||
_, err = cli.Logical().Write("auth/userpass/users/foo", map[string]interface{}{ | ||
"password": "bar", | ||
"ttl": "5s", | ||
"max_ttl": "10s", | ||
}) | ||
require.NoError(t, err) | ||
|
||
consul := e2edb.NewConsul() | ||
minio := e2edb.NewMinio(9000, blocksBucketName) | ||
require.NoError(t, s.StartAndWaitReady(consul, minio)) | ||
|
||
flags := mergeFlags( | ||
BlocksStorageFlags(), | ||
BlocksStorageS3Flags(), | ||
map[string]string{ | ||
"-vault.enabled": "true", | ||
"-vault.url": fmt.Sprintf("http://%s", vault.NetworkHTTPEndpoint()), | ||
"-vault.mount-path": "secret", | ||
"-vault.auth.type": "userpass", | ||
"-vault.auth.userpass.username": "foo", | ||
"-vault.auth.userpass.password": "bar", | ||
"-log.level": "debug", | ||
}, | ||
) | ||
|
||
// Start Mimir | ||
mimir := e2emimir.NewSingleBinary("mimir-1", e2e.MergeFlags(DefaultSingleBinaryFlags(), flags)) | ||
require.NoError(t, s.StartAndWaitReady(mimir)) | ||
|
||
// Check that the token lease has been updated before hitting max_ttl | ||
require.NoError(t, mimir.WaitSumMetrics(e2e.GreaterOrEqual(2), "cortex_vault_token_lease_renewal_total")) | ||
// Check that re-authentication occurred | ||
require.NoError(t, mimir.WaitSumMetrics(e2e.Equals(2), "cortex_vault_auth_total")) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -7,11 +7,16 @@ import ( | |||||
"errors" | ||||||
"flag" | ||||||
"fmt" | ||||||
"time" | ||||||
|
||||||
"github.com/go-kit/log" | ||||||
"github.com/go-kit/log/level" | ||||||
hashivault "github.com/hashicorp/vault/api" | ||||||
"github.com/hashicorp/vault/api/auth/approle" | ||||||
"github.com/hashicorp/vault/api/auth/kubernetes" | ||||||
"github.com/hashicorp/vault/api/auth/userpass" | ||||||
"github.com/prometheus/client_golang/prometheus" | ||||||
"github.com/prometheus/client_golang/prometheus/promauto" | ||||||
) | ||||||
|
||||||
// Config for the Vault used to fetch secrets | ||||||
|
@@ -57,13 +62,22 @@ type SecretsEngine interface { | |||||
} | ||||||
|
||||||
type Vault struct { | ||||||
KVStore SecretsEngine | ||||||
kvStore SecretsEngine | ||||||
auth AuthConfig | ||||||
|
||||||
client *hashivault.Client | ||||||
token *hashivault.Secret | ||||||
logger log.Logger | ||||||
|
||||||
authTotal prometheus.Counter | ||||||
authLeaseRenewalSuccessTotal prometheus.Counter | ||||||
authLeaseRenewalFailureTotal prometheus.Counter | ||||||
} | ||||||
|
||||||
func NewVault(cfg Config) (*Vault, error) { | ||||||
func NewVault(cfg Config, l log.Logger, registerer prometheus.Registerer) (*Vault, error) { | ||||||
if cfg.Mock != nil { | ||||||
return &Vault{ | ||||||
KVStore: cfg.Mock, | ||||||
kvStore: cfg.Mock, | ||||||
}, nil | ||||||
} | ||||||
|
||||||
|
@@ -75,26 +89,36 @@ func NewVault(cfg Config) (*Vault, error) { | |||||
return nil, err | ||||||
} | ||||||
|
||||||
authMethod, err := cfg.Auth.authMethod() | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
|
||||||
authFac := authFactoryReal{} | ||||||
_, err = authMethod.authenticate(context.Background(), &authFac, client) | ||||||
authToken, err := getAuthToken(context.Background(), &cfg.Auth, client) | ||||||
if err != nil { | ||||||
return nil, fmt.Errorf("error authenticating to vault: %w", err) | ||||||
return nil, fmt.Errorf("failed to get auth token from vault: %v", err) | ||||||
} | ||||||
|
||||||
vault := &Vault{ | ||||||
KVStore: client.KVv2(cfg.MountPath), | ||||||
kvStore: client.KVv2(cfg.MountPath), | ||||||
auth: cfg.Auth, | ||||||
token: authToken, | ||||||
client: client, | ||||||
logger: l, | ||||||
authTotal: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ | ||||||
Name: "cortex_vault_auth_total", | ||||||
Help: "Total number of times authentication to Vault happened during token lifecycle management", | ||||||
}), | ||||||
authLeaseRenewalSuccessTotal: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ | ||||||
Name: "cortex_vault_token_lease_renewal_total", | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
Help: "Total number of times the auth token was renewed successfully", | ||||||
}), | ||||||
authLeaseRenewalFailureTotal: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ | ||||||
Name: "cortex_vault_token_lease_renewal_failure_total", | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: |
||||||
Help: "Total number of times auth token lease renewal failed", | ||||||
}), | ||||||
} | ||||||
|
||||||
return vault, nil | ||||||
} | ||||||
|
||||||
func (v *Vault) ReadSecret(path string) ([]byte, error) { | ||||||
secret, err := v.KVStore.Get(context.Background(), path) | ||||||
secret, err := v.kvStore.Get(context.Background(), path) | ||||||
|
||||||
if err != nil { | ||||||
return nil, fmt.Errorf("unable to read secret from vault: %v", err) | ||||||
|
@@ -106,14 +130,76 @@ func (v *Vault) ReadSecret(path string) ([]byte, error) { | |||||
|
||||||
data, ok := secret.Data["value"].(string) | ||||||
if !ok { | ||||||
return nil, fmt.Errorf("secret data type is not string, found %T value: %#v", secret.Data["value"], secret.Data["value"]) | ||||||
return nil, fmt.Errorf("secret data type is not string, found %T value: %#v at path: %s", secret.Data["value"], secret.Data["value"], path) | ||||||
} | ||||||
|
||||||
return []byte(data), nil | ||||||
} | ||||||
|
||||||
func (v *Vault) manageTokenLifecycle(ctx context.Context, authTokenWatcher *hashivault.LifetimeWatcher) { | ||||||
go authTokenWatcher.Start() | ||||||
defer authTokenWatcher.Stop() | ||||||
|
||||||
for { | ||||||
select { | ||||||
case <-ctx.Done(): | ||||||
return | ||||||
|
||||||
case <-authTokenWatcher.DoneCh(): | ||||||
// Token failed to renew (e.g expired), re-auth required | ||||||
return | ||||||
|
||||||
case renewalInfo := <-authTokenWatcher.RenewCh(): | ||||||
// Token was successfully renewed | ||||||
if renewalInfo.Secret.Auth != nil { | ||||||
level.Debug(v.logger).Log("msg", "token renewed", "leaseDuration", time.Duration(renewalInfo.Secret.Auth.LeaseDuration)*time.Second) | ||||||
v.authLeaseRenewalSuccessTotal.Inc() | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
func (v *Vault) KeepRenewingTokenLease(ctx context.Context) error { | ||||||
for ctx.Err() == nil { | ||||||
authTokenWatcher, err := v.client.NewLifetimeWatcher(&hashivault.LifetimeWatcherInput{ | ||||||
Secret: v.token, | ||||||
}) | ||||||
if err != nil { | ||||||
v.authLeaseRenewalFailureTotal.Inc() | ||||||
return fmt.Errorf("error initializing auth token lifetime watcher: %v", err) | ||||||
} | ||||||
|
||||||
v.manageTokenLifecycle(ctx, authTokenWatcher) | ||||||
|
||||||
if ctx.Err() != nil { | ||||||
return ctx.Err() | ||||||
} | ||||||
|
||||||
newAuthToken, err := getAuthToken(ctx, &v.auth, v.client) | ||||||
if err != nil { | ||||||
level.Error(v.logger).Log("msg", "error during re-authentication after token expiry", "err", err) | ||||||
v.authLeaseRenewalFailureTotal.Inc() | ||||||
return err | ||||||
} | ||||||
|
||||||
v.authTotal.Inc() | ||||||
v.token = newAuthToken | ||||||
} | ||||||
|
||||||
return nil | ||||||
} | ||||||
|
||||||
type authFactoryReal struct{} | ||||||
|
||||||
func getAuthToken(ctx context.Context, authCfg *AuthConfig, client *hashivault.Client) (*hashivault.Secret, error) { | ||||||
am, err := authCfg.authMethod() | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
|
||||||
return am.authenticate(ctx, &authFactoryReal{}, client) | ||||||
} | ||||||
|
||||||
func (af *authFactoryReal) NewAppRoleAuth(roleID string, secretID *approle.SecretID, opts ...approle.LoginOption) (*approle.AppRoleAuth, error) { | ||||||
return approle.NewAppRoleAuth( | ||||||
roleID, | ||||||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure this description is correct. It's only how many times we started renewal process, but we don't know how many authentication attempts there were.
From user's perspective, how is this metric different from
authLeaseRenewalSuccessTotal
?I wonder if we should have a single "gauge" metric that's set to 1 as long as renewal is running, and we drop
cortex_vault_auth_total
andcortex_vault_token_lease_renewal_failure_total
.metrics. (However I'd keep
cortex_vault_token_lease_renewal_success_total
)WDYT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So
cortex_vault_auth_total
is only incremented upon re-authentication after the call togetAuthToken
-authLeaseRenewalSuccessTotal
gets incremented withinmanageTokenLifecycle
(there is no return statement in the case ofRenewCh
being received, which is why they differ)The thought process behind this was to have a metric to track token renewal, and a metric to track re-auth.