Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DR Metric scraping capability to debug command #15316

Merged
merged 8 commits into from
May 6, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions builtin/logical/transit/path_random.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package transit

import (
"context"

"github.com/hashicorp/vault/helper/random"
"github.com/hashicorp/vault/sdk/framework"
"github.com/hashicorp/vault/sdk/logical"
Expand Down
3 changes: 3 additions & 0 deletions changelog/15316.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
cli: vault debug can now collect metrics from a DR Secondary cluster if unauthenticated metrics access is enabled
davidadeleon marked this conversation as resolved.
Show resolved Hide resolved
```
35 changes: 19 additions & 16 deletions command/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type debugIndex struct {
Version int `json:"version"`
VaultAddress string `json:"vault_address"`
ClientVersion string `json:"client_version"`
ServerVersion string `json:"server_version"`
Timestamp time.Time `json:"timestamp"`
DurationSeconds int `json:"duration_seconds"`
IntervalSeconds int `json:"interval_seconds"`
Expand Down Expand Up @@ -245,6 +246,7 @@ func (c *DebugCommand) Run(args []string) int {
c.UI.Output("==> Starting debug capture...")
c.UI.Info(fmt.Sprintf(" Vault Address: %s", c.debugIndex.VaultAddress))
c.UI.Info(fmt.Sprintf(" Client Version: %s", c.debugIndex.ClientVersion))
c.UI.Info(fmt.Sprintf(" Server Version: %s", c.debugIndex.ServerVersion))
c.UI.Info(fmt.Sprintf(" Duration: %s", c.flagDuration))
c.UI.Info(fmt.Sprintf(" Interval: %s", c.flagInterval))
c.UI.Info(fmt.Sprintf(" Metrics Interval: %s", c.flagMetricsInterval))
Expand Down Expand Up @@ -412,9 +414,20 @@ func (c *DebugCommand) preflight(rawArgs []string) (string, error) {
if err != nil {
return "", fmt.Errorf("unable to create client to connect to Vault: %s", err)
}
if _, err := client.Sys().Health(); err != nil {
serverHealth, err := client.Sys().Health()
if err != nil {
return "", fmt.Errorf("unable to connect to the server: %s", err)
}

// Check if server is DR Secondary and we need to further
// ignore any targets due to endpoint restrictions
if serverHealth.ReplicationDRMode == "secondary" {
invalidDRTargets := strutil.Difference(c.flagTargets, c.validDRSecondaryTargets(), true)
if len(invalidDRTargets) != 0 {
c.UI.Info(fmt.Sprintf("Ignoring invalid targets for DR Secondary: %s", strings.Join(invalidDRTargets, ", ")))
c.flagTargets = strutil.Difference(c.flagTargets, invalidDRTargets, true)
}
}
c.cachedClient = client

captureTime := time.Now().UTC()
Expand Down Expand Up @@ -469,6 +482,7 @@ func (c *DebugCommand) preflight(rawArgs []string) (string, error) {
c.debugIndex = &debugIndex{
VaultAddress: client.Address(),
ClientVersion: version.GetVersion().VersionNumber(),
ServerVersion: serverHealth.Version,
Compress: c.flagCompress,
DurationSeconds: int(c.flagDuration.Seconds()),
IntervalSeconds: int(c.flagInterval.Seconds()),
Expand All @@ -487,6 +501,10 @@ func (c *DebugCommand) defaultTargets() []string {
return []string{"config", "host", "requests", "metrics", "pprof", "replication-status", "server-status", "log"}
}

func (c *DebugCommand) validDRSecondaryTargets() []string {
return []string{"metrics", "replication-status", "server-status"}
}

func (c *DebugCommand) captureStaticTargets() error {
// Capture configuration state
if strutil.StrListContains(c.flagTargets, "config") {
Expand Down Expand Up @@ -686,21 +704,6 @@ func (c *DebugCommand) collectMetrics(ctx context.Context) {
c.logger.Info("capturing metrics", "count", idxCount)
idxCount++

healthStatus, err := c.cachedClient.Sys().Health()
if err != nil {
c.captureError("metrics", err)
continue
}

// Check replication status. We skip on processing metrics if we're one
// a DR node, though non-perf standbys will fail if they aren't using
// unauthenticated_metrics_access.
switch {
case healthStatus.ReplicationDRMode == "secondary":
c.logger.Info("skipping metrics capture on DR secondary node")
continue
}

// Perform metrics request
r := c.cachedClient.NewRequest("GET", "/v1/sys/metrics")
resp, err := c.cachedClient.RawRequestWithContext(ctx, r)
Expand Down
3 changes: 3 additions & 0 deletions website/content/docs/commands/debug.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ pertains to the local node and the request should not be forwarded.
Additionally, host information is not available on the OpenBSD platform due to
library limitations in fetching the data without enabling `cgo`.

[Enterprise] Telemetry can be gathered from a DR Secondary active node via the
`metrics` target if [unauthenticated metrics access](/docs/configuration/listener/tcp#unauthenticated_metrics_access) is enabled.
davidadeleon marked this conversation as resolved.
Show resolved Hide resolved

## Output Layout

The output of the bundled information, once decompressed, is contained within a
Expand Down