Skip to content

Commit

Permalink
instrumented gate: observe permitted and non-permitted queries separa…
Browse files Browse the repository at this point in the history
…tely (#512)

* instrumented gate: observe permitted and non-permitted queries separately

This distinction helps when debugging increased congestion on a gate.

* Add separation between cancelled and deadline exceeded errors

* Add changelog entry

* Remove timeout gate

* Remove ErrTimeout
  • Loading branch information
dimitarvdimitrov committed Apr 3, 2024
1 parent efaf529 commit 1435abf
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@
* `gate_duration_seconds`
* `kv_request_duration_seconds`
* `operation_duration_seconds`
* [ENHANCEMENT] Add `outcome` label to `gate_duration_seconds` metric. Possible values are `rejected_canceled`, `rejected_deadline_exceeded`, `rejected_other`, and `permitted`. #512
* [BUGFIX] spanlogger: Support multiple tenant IDs. #59
* [BUGFIX] Memberlist: fixed corrupted packets when sending compound messages with more than 255 messages or messages bigger than 64KB. #85
* [BUGFIX] Ring: `ring_member_ownership_percent` and `ring_tokens_owned` metrics are not updated on scale down. #109
Expand Down
20 changes: 14 additions & 6 deletions gate/gate.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,15 @@ func NewInstrumented(reg prometheus.Registerer, maxConcurrent int, gate Gate) Ga
Name: "gate_queries_in_flight",
Help: "Number of queries that are currently in flight.",
}),
duration: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
duration: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
Name: "gate_duration_seconds",
Help: "How many seconds it took for queries to wait at the gate.",
Buckets: []float64{0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120, 240, 360, 720},
// Use defaults recommended by Prometheus for native histograms.
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: time.Hour,
}),
}, []string{"outcome"}),
}

g.max.Set(float64(maxConcurrent))
Expand All @@ -84,20 +84,28 @@ type instrumentedGate struct {

max prometheus.Gauge
inflight prometheus.Gauge
duration prometheus.Histogram
duration *prometheus.HistogramVec
}

func (g *instrumentedGate) Start(ctx context.Context) error {
start := time.Now()
defer func() {
g.duration.Observe(time.Since(start).Seconds())
}()

err := g.gate.Start(ctx)
if err != nil {
var reason string
switch {
case errors.Is(err, context.Canceled):
reason = "rejected_canceled"
case errors.Is(err, context.DeadlineExceeded):
reason = "rejected_deadline_exceeded"
default:
reason = "rejected_other"
}
g.duration.WithLabelValues(reason).Observe(time.Since(start).Seconds())
return err
}

g.duration.WithLabelValues("permitted").Observe(time.Since(start).Seconds())
g.inflight.Inc()
return nil
}
Expand Down

0 comments on commit 1435abf

Please sign in to comment.