Skip to content

Commit

Permalink
Add DR Metric scraping capability to debug command (#15316) (#15395)
Browse files Browse the repository at this point in the history
* Add server information as well as ability to collect metrics from DR secondary

* Update debug docs

Adding additional information around ability to gather metrics from DR secondary

* Fix broken link in updated doc

* Create 15316.txt

Create changelog entry

* Fix Formatting

* Update website/content/docs/commands/debug.mdx

Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com>

* Update changelog/15316.txt

Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com>

* Trigger Build

Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com>

Co-authored-by: Jason O'Donnell <2160810+jasonodonnell@users.noreply.github.com>
  • Loading branch information
davidadeleon and jasonodonnell committed May 17, 2022
1 parent 78a069d commit 3f99b76
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 16 deletions.
3 changes: 3 additions & 0 deletions changelog/15316.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
cli/debug: added support for retrieving metrics from DR clusters if `unauthenticated_metrics_access` is enabled
```
35 changes: 19 additions & 16 deletions command/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type debugIndex struct {
Version int `json:"version"`
VaultAddress string `json:"vault_address"`
ClientVersion string `json:"client_version"`
ServerVersion string `json:"server_version"`
Timestamp time.Time `json:"timestamp"`
DurationSeconds int `json:"duration_seconds"`
IntervalSeconds int `json:"interval_seconds"`
Expand Down Expand Up @@ -245,6 +246,7 @@ func (c *DebugCommand) Run(args []string) int {
c.UI.Output("==> Starting debug capture...")
c.UI.Info(fmt.Sprintf(" Vault Address: %s", c.debugIndex.VaultAddress))
c.UI.Info(fmt.Sprintf(" Client Version: %s", c.debugIndex.ClientVersion))
c.UI.Info(fmt.Sprintf(" Server Version: %s", c.debugIndex.ServerVersion))
c.UI.Info(fmt.Sprintf(" Duration: %s", c.flagDuration))
c.UI.Info(fmt.Sprintf(" Interval: %s", c.flagInterval))
c.UI.Info(fmt.Sprintf(" Metrics Interval: %s", c.flagMetricsInterval))
Expand Down Expand Up @@ -412,9 +414,20 @@ func (c *DebugCommand) preflight(rawArgs []string) (string, error) {
if err != nil {
return "", fmt.Errorf("unable to create client to connect to Vault: %s", err)
}
if _, err := client.Sys().Health(); err != nil {
serverHealth, err := client.Sys().Health()
if err != nil {
return "", fmt.Errorf("unable to connect to the server: %s", err)
}

// Check if server is DR Secondary and we need to further
// ignore any targets due to endpoint restrictions
if serverHealth.ReplicationDRMode == "secondary" {
invalidDRTargets := strutil.Difference(c.flagTargets, c.validDRSecondaryTargets(), true)
if len(invalidDRTargets) != 0 {
c.UI.Info(fmt.Sprintf("Ignoring invalid targets for DR Secondary: %s", strings.Join(invalidDRTargets, ", ")))
c.flagTargets = strutil.Difference(c.flagTargets, invalidDRTargets, true)
}
}
c.cachedClient = client

captureTime := time.Now().UTC()
Expand Down Expand Up @@ -469,6 +482,7 @@ func (c *DebugCommand) preflight(rawArgs []string) (string, error) {
c.debugIndex = &debugIndex{
VaultAddress: client.Address(),
ClientVersion: version.GetVersion().VersionNumber(),
ServerVersion: serverHealth.Version,
Compress: c.flagCompress,
DurationSeconds: int(c.flagDuration.Seconds()),
IntervalSeconds: int(c.flagInterval.Seconds()),
Expand All @@ -487,6 +501,10 @@ func (c *DebugCommand) defaultTargets() []string {
return []string{"config", "host", "requests", "metrics", "pprof", "replication-status", "server-status", "log"}
}

func (c *DebugCommand) validDRSecondaryTargets() []string {
return []string{"metrics", "replication-status", "server-status"}
}

func (c *DebugCommand) captureStaticTargets() error {
// Capture configuration state
if strutil.StrListContains(c.flagTargets, "config") {
Expand Down Expand Up @@ -686,21 +704,6 @@ func (c *DebugCommand) collectMetrics(ctx context.Context) {
c.logger.Info("capturing metrics", "count", idxCount)
idxCount++

healthStatus, err := c.cachedClient.Sys().Health()
if err != nil {
c.captureError("metrics", err)
continue
}

// Check replication status. We skip on processing metrics if we're one
// a DR node, though non-perf standbys will fail if they aren't using
// unauthenticated_metrics_access.
switch {
case healthStatus.ReplicationDRMode == "secondary":
c.logger.Info("skipping metrics capture on DR secondary node")
continue
}

// Perform metrics request
r := c.cachedClient.NewRequest("GET", "/v1/sys/metrics")
resp, err := c.cachedClient.RawRequestWithContext(ctx, r)
Expand Down
3 changes: 3 additions & 0 deletions website/content/docs/commands/debug.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ pertains to the local node and the request should not be forwarded.
Additionally, host information is not available on the OpenBSD platform due to
library limitations in fetching the data without enabling `cgo`.

[Enterprise] Telemetry can be gathered from a DR Secondary active node via the
`metrics` target if [unauthenticated_metrics_access](/docs/configuration/listener/tcp#unauthenticated_metrics_access) is enabled.

## Output Layout

The output of the bundled information, once decompressed, is contained within a
Expand Down

0 comments on commit 3f99b76

Please sign in to comment.